зеркало из https://github.com/mozilla/pjs.git
Bug 389056 Don't break line between periods and quote/Bug 388096 add line-breaking opportunity after U+058A ARMENIAN HYPHEN, U+2010 HYPHEN, U+2012 FIGURE DASH and U+2013 EN DASH. And U+2014 EM DASH should be breakable before and after except that between the same characters/Bug 389595 Don't break around '(' and ')'/Bug 390920 Don't break around r+sr=roc, b1.9=dbaron
This commit is contained in:
Родитель
e33c93552c
Коммит
89308a5024
|
@ -40,6 +40,7 @@
|
|||
|
||||
#include "nsString.h"
|
||||
#include "nsTArray.h"
|
||||
#include "nsILineBreaker.h"
|
||||
|
||||
class nsIAtom;
|
||||
|
||||
|
@ -81,10 +82,7 @@ public:
|
|||
nsLineBreaker();
|
||||
~nsLineBreaker();
|
||||
|
||||
static inline PRBool IsSpace(PRUnichar u)
|
||||
{
|
||||
return u == 0x0020 || u == 0x200b/*ZWSP*/ || u == '\n' || u == '\t';
|
||||
}
|
||||
static inline PRBool IsSpace(PRUnichar u) { return NS_IsSpace(u); }
|
||||
|
||||
static inline PRBool IsComplexASCIIChar(PRUnichar u)
|
||||
{
|
||||
|
|
|
@ -72,4 +72,18 @@ public:
|
|||
|
||||
NS_DEFINE_STATIC_IID_ACCESSOR(nsILineBreaker, NS_ILINEBREAKER_IID)
|
||||
|
||||
static inline PRBool
|
||||
NS_IsSpace(PRUnichar u)
|
||||
{
|
||||
return u == 0x0020 || // SPACE
|
||||
u == 0x0009 || // CHARACTER TABULATION
|
||||
u == 0x000D || // CARRIAGE RETURN
|
||||
(0x2000 <= u && u <= 0x2006) || // EN QUAD, EM QUAD, EN SPACE,
|
||||
// EM SPACE, THREE-PER-EM SPACE,
|
||||
// FOUR-PER-SPACE, SIX-PER-EM SPACE,
|
||||
(0x2008 <= u && u <= 0x200B) || // PUNCTUATION SPACE, THIN SPACE,
|
||||
// HAIR SPACE, ZERO WIDTH SPACE
|
||||
u == 0x3000; // IDEOGRAPHIC SPACE
|
||||
}
|
||||
|
||||
#endif /* nsILineBreaker_h__ */
|
||||
|
|
|
@ -43,104 +43,104 @@ static const PRUint32 gLBClass00[32] = {
|
|||
0x55555555, // U+0008 - U+000F
|
||||
0x55555555, // U+0010 - U+0017
|
||||
0x55555555, // U+0018 - U+001F
|
||||
0x88108815, // U+0020 - U+0027
|
||||
0x11118810, // U+0028 - U+002F
|
||||
0x7AABAAA5, // U+0020 - U+0027
|
||||
0x7A7AAAA9, // U+0028 - U+002F
|
||||
0x66666666, // U+0030 - U+0037
|
||||
0x11101866, // U+0038 - U+003F
|
||||
0x88888888, // U+0040 - U+0047
|
||||
0x88888888, // U+0048 - U+004F
|
||||
0x88888888, // U+0050 - U+0057
|
||||
0x88100888, // U+0058 - U+005F
|
||||
0x88888888, // U+0060 - U+0067
|
||||
0x88888888, // U+0068 - U+006F
|
||||
0x88888888, // U+0070 - U+0077
|
||||
0x88180888, // U+0078 - U+007F
|
||||
0x88888888, // U+0080 - U+0087
|
||||
0x88888888, // U+0088 - U+008F
|
||||
0x88888888, // U+0090 - U+0097
|
||||
0x88888888, // U+0098 - U+009F
|
||||
0x88383488, // U+00A0 - U+00A7
|
||||
0x88888888, // U+00A8 - U+00AF
|
||||
0x88888881, // U+00B0 - U+00B7
|
||||
0x88888888, // U+00B8 - U+00BF
|
||||
0x88888888, // U+00C0 - U+00C7
|
||||
0x88888888, // U+00C8 - U+00CF
|
||||
0x88888888, // U+00D0 - U+00D7
|
||||
0x88888888, // U+00D8 - U+00DF
|
||||
0x88888888, // U+00E0 - U+00E7
|
||||
0x88888888, // U+00E8 - U+00EF
|
||||
0x88888888, // U+00F0 - U+00F7
|
||||
0x88888888, // U+00F8 - U+00FF
|
||||
0x1AA9AA66, // U+0038 - U+003F
|
||||
0x77777777, // U+0040 - U+0047
|
||||
0x77777777, // U+0048 - U+004F
|
||||
0x77777777, // U+0050 - U+0057
|
||||
0x77AA9777, // U+0058 - U+005F
|
||||
0x77777777, // U+0060 - U+0067
|
||||
0x77777777, // U+0068 - U+006F
|
||||
0x77777777, // U+0070 - U+0077
|
||||
0x7AAA9777, // U+0078 - U+007F
|
||||
0x77777777, // U+0080 - U+0087
|
||||
0x77777777, // U+0088 - U+008F
|
||||
0x77777777, // U+0090 - U+0097
|
||||
0x77777777, // U+0098 - U+009F
|
||||
0xAA9A9AAB, // U+00A0 - U+00A7
|
||||
0x77A9A77A, // U+00A8 - U+00AF
|
||||
0xAAAAAAAA, // U+00B0 - U+00B7
|
||||
0xAAAAAAAA, // U+00B8 - U+00BF
|
||||
0x77777777, // U+00C0 - U+00C7
|
||||
0x77777777, // U+00C8 - U+00CF
|
||||
0x77777777, // U+00D0 - U+00D7
|
||||
0x77777777, // U+00D8 - U+00DF
|
||||
0x77777777, // U+00E0 - U+00E7
|
||||
0x77777777, // U+00E8 - U+00EF
|
||||
0xA7777777, // U+00F0 - U+00F7
|
||||
0x77777777, // U+00F8 - U+00FF
|
||||
};
|
||||
|
||||
static const PRUint32 gLBClass20[32] = {
|
||||
0x55555555, // U+2000 - U+2007
|
||||
0x88885555, // U+2008 - U+200F
|
||||
0x88828888, // U+2010 - U+2017
|
||||
0x88888888, // U+2018 - U+201F
|
||||
0x81118888, // U+2020 - U+2027
|
||||
0x88888888, // U+2028 - U+202F
|
||||
0x88884444, // U+2030 - U+2037
|
||||
0x88815888, // U+2038 - U+203F
|
||||
0x88818888, // U+2040 - U+2047
|
||||
0x88888888, // U+2048 - U+204F
|
||||
0x88888888, // U+2050 - U+2057
|
||||
0x88888888, // U+2058 - U+205F
|
||||
0x88888888, // U+2060 - U+2067
|
||||
0x88888888, // U+2068 - U+206F
|
||||
0x88888888, // U+2070 - U+2077
|
||||
0x88888888, // U+2078 - U+207F
|
||||
0x88888888, // U+2080 - U+2087
|
||||
0x88888888, // U+2088 - U+208F
|
||||
0x88888888, // U+2090 - U+2097
|
||||
0x88888888, // U+2098 - U+209F
|
||||
0x88888888, // U+20A0 - U+20A7
|
||||
0x88888888, // U+20A8 - U+20AF
|
||||
0x88888888, // U+20B0 - U+20B7
|
||||
0x88888888, // U+20B8 - U+20BF
|
||||
0x88888888, // U+20C0 - U+20C7
|
||||
0x88888888, // U+20C8 - U+20CF
|
||||
0x88888888, // U+20D0 - U+20D7
|
||||
0x88888888, // U+20D8 - U+20DF
|
||||
0x88888888, // U+20E0 - U+20E7
|
||||
0x88888888, // U+20E8 - U+20EF
|
||||
0x88888888, // U+20F0 - U+20F7
|
||||
0x88888888, // U+20F8 - U+20FF
|
||||
0xB5555555, // U+2000 - U+2007
|
||||
0x77775555, // U+2008 - U+200F
|
||||
0x777211B1, // U+2010 - U+2017
|
||||
0x77777777, // U+2018 - U+201F
|
||||
0xA2227777, // U+2020 - U+2027
|
||||
0xB7777777, // U+2028 - U+202F
|
||||
0x77744444, // U+2030 - U+2037
|
||||
0x7A115107, // U+2038 - U+203F
|
||||
0x11017777, // U+2040 - U+2047
|
||||
0x77777711, // U+2048 - U+204F
|
||||
0x77777777, // U+2050 - U+2057
|
||||
0x77777777, // U+2058 - U+205F
|
||||
0x77777777, // U+2060 - U+2067
|
||||
0x77777777, // U+2068 - U+206F
|
||||
0x77777777, // U+2070 - U+2077
|
||||
0x77777777, // U+2078 - U+207F
|
||||
0x77777777, // U+2080 - U+2087
|
||||
0x77777777, // U+2088 - U+208F
|
||||
0x77777777, // U+2090 - U+2097
|
||||
0x77777777, // U+2098 - U+209F
|
||||
0x77777777, // U+20A0 - U+20A7
|
||||
0x77777777, // U+20A8 - U+20AF
|
||||
0x77777777, // U+20B0 - U+20B7
|
||||
0x77777777, // U+20B8 - U+20BF
|
||||
0x77777777, // U+20C0 - U+20C7
|
||||
0x77777777, // U+20C8 - U+20CF
|
||||
0x77777777, // U+20D0 - U+20D7
|
||||
0x77777777, // U+20D8 - U+20DF
|
||||
0x77777777, // U+20E0 - U+20E7
|
||||
0x77777777, // U+20E8 - U+20EF
|
||||
0x77777777, // U+20F0 - U+20F7
|
||||
0x77777777, // U+20F8 - U+20FF
|
||||
};
|
||||
|
||||
static const PRUint32 gLBClass21[32] = {
|
||||
0x88888888, // U+2100 - U+2107
|
||||
0x88888888, // U+2108 - U+210F
|
||||
0x83888888, // U+2110 - U+2117
|
||||
0x88888888, // U+2118 - U+211F
|
||||
0x87888888, // U+2120 - U+2127
|
||||
0x88888888, // U+2128 - U+212F
|
||||
0x88888888, // U+2130 - U+2137
|
||||
0x88888888, // U+2138 - U+213F
|
||||
0x88888888, // U+2140 - U+2147
|
||||
0x88888888, // U+2148 - U+214F
|
||||
0x88888888, // U+2150 - U+2157
|
||||
0x88888888, // U+2158 - U+215F
|
||||
0x77777777, // U+2100 - U+2107
|
||||
0x77777777, // U+2108 - U+210F
|
||||
0x73777777, // U+2110 - U+2117
|
||||
0x77777777, // U+2118 - U+211F
|
||||
0x77777777, // U+2120 - U+2127
|
||||
0x77777777, // U+2128 - U+212F
|
||||
0x77777777, // U+2130 - U+2137
|
||||
0x77777777, // U+2138 - U+213F
|
||||
0x77777777, // U+2140 - U+2147
|
||||
0x77777777, // U+2148 - U+214F
|
||||
0x77777777, // U+2150 - U+2157
|
||||
0x77777777, // U+2158 - U+215F
|
||||
0x55555555, // U+2160 - U+2167
|
||||
0x55555555, // U+2168 - U+216F
|
||||
0x55555555, // U+2170 - U+2177
|
||||
0x55555555, // U+2178 - U+217F
|
||||
0x88888888, // U+2180 - U+2187
|
||||
0x88888888, // U+2188 - U+218F
|
||||
0x88888888, // U+2190 - U+2197
|
||||
0x88888888, // U+2198 - U+219F
|
||||
0x88888888, // U+21A0 - U+21A7
|
||||
0x88888888, // U+21A8 - U+21AF
|
||||
0x88888888, // U+21B0 - U+21B7
|
||||
0x88888888, // U+21B8 - U+21BF
|
||||
0x88888888, // U+21C0 - U+21C7
|
||||
0x88888888, // U+21C8 - U+21CF
|
||||
0x88888888, // U+21D0 - U+21D7
|
||||
0x88888888, // U+21D8 - U+21DF
|
||||
0x88888888, // U+21E0 - U+21E7
|
||||
0x88888888, // U+21E8 - U+21EF
|
||||
0x88888888, // U+21F0 - U+21F7
|
||||
0x88888888, // U+21F8 - U+21FF
|
||||
0x77777777, // U+2180 - U+2187
|
||||
0x77777777, // U+2188 - U+218F
|
||||
0x77777777, // U+2190 - U+2197
|
||||
0x77777777, // U+2198 - U+219F
|
||||
0x77777777, // U+21A0 - U+21A7
|
||||
0x77777777, // U+21A8 - U+21AF
|
||||
0x77777777, // U+21B0 - U+21B7
|
||||
0x77777777, // U+21B8 - U+21BF
|
||||
0x77777777, // U+21C0 - U+21C7
|
||||
0x77777777, // U+21C8 - U+21CF
|
||||
0x77777777, // U+21D0 - U+21D7
|
||||
0x77777777, // U+21D8 - U+21DF
|
||||
0x77777777, // U+21E0 - U+21E7
|
||||
0x77777777, // U+21E8 - U+21EF
|
||||
0x77777777, // U+21F0 - U+21F7
|
||||
0x77777777, // U+21F8 - U+21FF
|
||||
};
|
||||
|
||||
static const PRUint32 gLBClass30[32] = {
|
||||
|
@ -179,37 +179,37 @@ static const PRUint32 gLBClass30[32] = {
|
|||
};
|
||||
|
||||
static const PRUint32 gLBClass0E[32] = {
|
||||
0x99999999, // U+0E00 - U+0E07
|
||||
0x99999999, // U+0E08 - U+0E0F
|
||||
0x99999999, // U+0E10 - U+0E17
|
||||
0x99999999, // U+0E18 - U+0E1F
|
||||
0x99999999, // U+0E20 - U+0E27
|
||||
0x19999999, // U+0E28 - U+0E2F
|
||||
0x99999999, // U+0E30 - U+0E37
|
||||
0x09999999, // U+0E38 - U+0E3F
|
||||
0x91999999, // U+0E40 - U+0E47
|
||||
0x89999999, // U+0E48 - U+0E4F
|
||||
0x88888888, // U+0E00 - U+0E07
|
||||
0x88888888, // U+0E08 - U+0E0F
|
||||
0x88888888, // U+0E10 - U+0E17
|
||||
0x88888888, // U+0E18 - U+0E1F
|
||||
0x88888888, // U+0E20 - U+0E27
|
||||
0x18888888, // U+0E28 - U+0E2F
|
||||
0x88888888, // U+0E30 - U+0E37
|
||||
0x08888888, // U+0E38 - U+0E3F
|
||||
0x81888888, // U+0E40 - U+0E47
|
||||
0x78888888, // U+0E48 - U+0E4F
|
||||
0x66666666, // U+0E50 - U+0E57
|
||||
0x99991166, // U+0E58 - U+0E5F
|
||||
0x99999999, // U+0E60 - U+0E67
|
||||
0x99999999, // U+0E68 - U+0E6F
|
||||
0x99999999, // U+0E70 - U+0E77
|
||||
0x99999999, // U+0E78 - U+0E7F
|
||||
0x99999999, // U+0E80 - U+0E87
|
||||
0x99999999, // U+0E88 - U+0E8F
|
||||
0x99999999, // U+0E90 - U+0E97
|
||||
0x99999999, // U+0E98 - U+0E9F
|
||||
0x99999999, // U+0EA0 - U+0EA7
|
||||
0x19999999, // U+0EA8 - U+0EAF
|
||||
0x99999999, // U+0EB0 - U+0EB7
|
||||
0x99999999, // U+0EB8 - U+0EBF
|
||||
0x91999999, // U+0EC0 - U+0EC7
|
||||
0x99999999, // U+0EC8 - U+0ECF
|
||||
0x88881166, // U+0E58 - U+0E5F
|
||||
0x88888888, // U+0E60 - U+0E67
|
||||
0x88888888, // U+0E68 - U+0E6F
|
||||
0x88888888, // U+0E70 - U+0E77
|
||||
0x88888888, // U+0E78 - U+0E7F
|
||||
0x88888888, // U+0E80 - U+0E87
|
||||
0x88888888, // U+0E88 - U+0E8F
|
||||
0x88888888, // U+0E90 - U+0E97
|
||||
0x88888888, // U+0E98 - U+0E9F
|
||||
0x88888888, // U+0EA0 - U+0EA7
|
||||
0x18888888, // U+0EA8 - U+0EAF
|
||||
0x88888888, // U+0EB0 - U+0EB7
|
||||
0x88888888, // U+0EB8 - U+0EBF
|
||||
0x81888888, // U+0EC0 - U+0EC7
|
||||
0x88888888, // U+0EC8 - U+0ECF
|
||||
0x66666666, // U+0ED0 - U+0ED7
|
||||
0x99999966, // U+0ED8 - U+0EDF
|
||||
0x99999999, // U+0EE0 - U+0EE7
|
||||
0x99999999, // U+0EE8 - U+0EEF
|
||||
0x99999999, // U+0EF0 - U+0EF7
|
||||
0x99999999, // U+0EF8 - U+0EFF
|
||||
0x88888866, // U+0ED8 - U+0EDF
|
||||
0x88888888, // U+0EE0 - U+0EE7
|
||||
0x88888888, // U+0EE8 - U+0EEF
|
||||
0x88888888, // U+0EF0 - U+0EF7
|
||||
0x88888888, // U+0EF8 - U+0EFF
|
||||
};
|
||||
|
||||
|
|
|
@ -66,78 +66,97 @@
|
|||
4 X X X X X X
|
||||
5 X X X X X X
|
||||
6 X X X X X X
|
||||
7 X X X X X X X
|
||||
8 X X X X X X E
|
||||
7 X X X X X X X
|
||||
8 X X X X X X E
|
||||
9 X X X X X X
|
||||
10 X X X X X X
|
||||
11 X X X X X X
|
||||
12 X X X X X X
|
||||
12 X X X X X X
|
||||
13 X X X X X X X
|
||||
14 X X X X X X X
|
||||
15 X X X X X X X X X
|
||||
15 X X X X X X X X X
|
||||
16 X X X X X X X X
|
||||
17 X X X X X E
|
||||
18 X X X X X X X X X
|
||||
17 X X X X X E
|
||||
18 X X X X X X X X X
|
||||
19 X E E E E E X X X X X X X X X X X X E X E E
|
||||
20 X X X X X E
|
||||
|
||||
* Same Char
|
||||
# Other Char
|
||||
|
||||
|
||||
X Cannot Break
|
||||
|
||||
The classes mean:
|
||||
1: Open parenthesis
|
||||
2: Close parenthesis
|
||||
3: Prohibit a line break before
|
||||
4: Punctuation for sentence end (except Full stop, e.g., "!" and "?")
|
||||
5: Middle dot (e.g., U+30FB KATAKANA MIDDLE DOT)
|
||||
6: Full stop
|
||||
7: Non-breakable between same characters
|
||||
8: Prefix (e.g., "$", "NO.")
|
||||
9: Postfix (e.g., "%")
|
||||
10: Ideographic space
|
||||
11: Hiragana
|
||||
12: Japanese characters (except class 11)
|
||||
13: Subscript
|
||||
14: Ruby
|
||||
15: Numeric
|
||||
16: Alphabet
|
||||
17: Space for Western language
|
||||
18: Western characters (except class 17)
|
||||
19: Split line note (Warichu) begin quote
|
||||
20: Split line note (Warichu) end quote
|
||||
|
||||
2. Simplified by remove the class which we do not care
|
||||
|
||||
However, since we do not care about class 13(Subscript), 14(Ruby),
|
||||
19(split line note begin quote), and 20(split line note end quote)
|
||||
we can simplify this par table into the following
|
||||
However, since we do not care about class 13(Subscript), 14(Ruby),
|
||||
16 (Aphabet), 19(split line note begin quote), and 20(split line note end
|
||||
quote) we can simplify this par table into the following
|
||||
|
||||
Class of
|
||||
Leading Class of Trailing Char Class
|
||||
Char
|
||||
Char
|
||||
|
||||
1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18
|
||||
|
||||
1 X X X X X X X X X X X X X X X X
|
||||
2 X X X X X
|
||||
3 X X X X X
|
||||
1 2 3 4 5 6 7 8 9 10 11 12 15 17 18
|
||||
|
||||
1 X X X X X X X X X X X X X X X
|
||||
2 X X X X X
|
||||
3 X X X X X
|
||||
4 X X X X X
|
||||
5 X X X X X
|
||||
6 X X X X X
|
||||
7 X X X X X X
|
||||
8 X X X X X X
|
||||
9 X X X X X
|
||||
10 X X X X X
|
||||
11 X X X X X
|
||||
12 X X X X X
|
||||
15 X X X X X X X X
|
||||
16 X X X X X X X
|
||||
17 X X X X X
|
||||
18 X X X X X X X X
|
||||
5 X X X X X
|
||||
6 X X X X X
|
||||
7 X X X X X X
|
||||
8 X X X X X X
|
||||
9 X X X X X
|
||||
10 X X X X X
|
||||
11 X X X X X
|
||||
12 X X X X X
|
||||
15 X X X X X X X X
|
||||
17 X X X X X
|
||||
18 X X X X X X X
|
||||
|
||||
3. Simplified by merged classes
|
||||
|
||||
After the 2 simplification, the pair table have some duplication
|
||||
After the 2 simplification, the pair table have some duplication
|
||||
a. class 2, 3, 4, 5, 6, are the same- we can merged them
|
||||
b. class 10, 11, 12, 17 are the same- we can merged them
|
||||
|
||||
|
||||
Class of
|
||||
Leading Class of Trailing Char Class
|
||||
Char
|
||||
Char
|
||||
|
||||
1 [a] 7 8 9 [b]15 16 18
|
||||
|
||||
1 X X X X X X X X X
|
||||
[a] X
|
||||
7 X X
|
||||
8 X X
|
||||
9 X
|
||||
[b] X
|
||||
15 X X X X
|
||||
16 X X X
|
||||
18 X X X X
|
||||
1 [a] 7 8 9 [b]15 18
|
||||
|
||||
1 X X X X X X X X
|
||||
[a] X
|
||||
7 X X
|
||||
8 X X
|
||||
9 X
|
||||
[b] X
|
||||
15 X X X X
|
||||
18 X X X
|
||||
|
||||
|
||||
4. We add COMPLEX characters and make it breakable w/ all ther class
|
||||
|
@ -145,41 +164,173 @@
|
|||
|
||||
Class of
|
||||
Leading Class of Trailing Char Class
|
||||
Char
|
||||
Char
|
||||
|
||||
1 [a] 7 8 9 [b]15 18 COMPLEX
|
||||
|
||||
1 X X X X X X X X X
|
||||
[a] X
|
||||
7 X X
|
||||
8 X X
|
||||
9 X
|
||||
[b] X
|
||||
15 X X X X
|
||||
18 X X X
|
||||
COMPLEX X T
|
||||
|
||||
1 [a] 7 8 9 [b]15 16 18 COMPLEX
|
||||
|
||||
1 X X X X X X X X X X
|
||||
[a] X
|
||||
7 X X
|
||||
8 X X
|
||||
9 X
|
||||
[b] X
|
||||
15 X X X X
|
||||
16 X X X
|
||||
18 X X X X
|
||||
COMPLEX X T
|
||||
|
||||
T : need special handling
|
||||
|
||||
5. Now we use one bit to encode weather it is breakable, and use 2 bytes
|
||||
|
||||
5. However, we need two special class for some punctuations/parentheses,
|
||||
theirs breaking rules like character class (18), see bug 389056.
|
||||
And also we need character like punctuation that is same behavior with 18,
|
||||
but the characters are not letters of all languages. (e.g., '_')
|
||||
[c]. Based on open parenthesis class (1), but it is not breakable after
|
||||
character class (18) or numeric class (15).
|
||||
[d]. Based on close parenthesis (or punctuation) class (2), but it is not
|
||||
breakable before character class (18) or numeric class (15).
|
||||
|
||||
Class of
|
||||
Leading Class of Trailing Char Class
|
||||
Char
|
||||
|
||||
1 [a] 7 8 9 [b]15 18 COMPLEX [c] [d]
|
||||
|
||||
1 X X X X X X X X X X X
|
||||
[a] X X X
|
||||
7 X X
|
||||
8 X X
|
||||
9 X
|
||||
[b] X X
|
||||
15 X X X X X X
|
||||
18 X X X X X
|
||||
COMPLEX X T
|
||||
[c] X X X X X X X X X X X
|
||||
[d] X X X X
|
||||
|
||||
|
||||
6. And Unicode has "NON-BREAK" characters. The lines should be broken around
|
||||
them. But in JIS X 4051, such class is not, therefore, we create [e].
|
||||
|
||||
Class of
|
||||
Leading Class of Trailing Char Class
|
||||
Char
|
||||
|
||||
1 [a] 7 8 9 [b]15 18 COMPLEX [c] [d] [e]
|
||||
|
||||
1 X X X X X X X X X X X X
|
||||
[a] X X X X
|
||||
7 X X X
|
||||
8 X X X
|
||||
9 X X
|
||||
[b] X X X
|
||||
15 X X X X X X X
|
||||
18 X X X X X X
|
||||
COMPLEX X T X
|
||||
[c] X X X X X X X X X X X X
|
||||
[d] X X X X X
|
||||
[e] X X X X X X X X X X X X
|
||||
|
||||
|
||||
7. Now we use one bit to encode weather it is breakable, and use 2 bytes
|
||||
for one row, then the bit table will look like:
|
||||
|
||||
18 <- 1
|
||||
|
||||
1 0000 0011 1111 1111 = 0x03FF
|
||||
[a] 0000 0000 0000 0010 = 0x0002
|
||||
7 0000 0000 0000 0110 = 0x0006
|
||||
8 0000 0000 0100 0010 = 0x0042
|
||||
9 0000 0000 0000 0010 = 0x0002
|
||||
[b] 0000 0000 0000 0010 = 0x0002
|
||||
15 0000 0001 0101 0010 = 0x0152
|
||||
16 0000 0001 1000 0010 = 0x0182
|
||||
18 0000 0001 1100 0010 = 0x01C2
|
||||
COMPLEX 0000 0010 0000 0010 = 0x0202
|
||||
|
||||
5. Now we map the class to number
|
||||
|
||||
1 0000 1111 1111 1111 = 0x0FFF
|
||||
[a] 0000 1110 0000 0010 = 0x0E02
|
||||
7 0000 1000 0000 0110 = 0x0806
|
||||
8 0000 1000 0100 0010 = 0x0842
|
||||
9 0000 1000 0000 0010 = 0x0802
|
||||
[b] 0000 1100 0000 0010 = 0x0C02
|
||||
15 0000 1110 1101 0010 = 0x0ED2
|
||||
18 0000 1110 1100 0010 = 0x0EC2
|
||||
COMPLEX 0000 1001 0000 0010 = 0x0902
|
||||
[c] 0000 1111 1111 1111 = 0x0FFF
|
||||
[d] 0000 1100 1100 0010 = 0x0CC2
|
||||
[e] 0000 1111 1111 1111 = 0x0FFF
|
||||
*/
|
||||
|
||||
#define MAX_CLASSES 12
|
||||
|
||||
static const PRUint16 gPair[MAX_CLASSES] = {
|
||||
0x0FFF,
|
||||
0x0E02,
|
||||
0x0806,
|
||||
0x0842,
|
||||
0x0802,
|
||||
0x0C02,
|
||||
0x0ED2,
|
||||
0x0EC2,
|
||||
0x0902,
|
||||
0x0FFF,
|
||||
0x0CC2,
|
||||
0x0FFF
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
|
||||
8. And if the character is not enough far from word start, word end and
|
||||
another break point, we should not break in non-CJK languages.
|
||||
I.e., Don't break around 15, 18, [c] and [d], but don't change
|
||||
that if they are related to [b].
|
||||
|
||||
Class of
|
||||
Leading Class of Trailing Char Class
|
||||
Char
|
||||
|
||||
1 [a] 7 8 9 [b]15 18 COMPLEX [c] [d] [e]
|
||||
|
||||
1 X X X X X X X X X X X X
|
||||
[a] X X X X X X
|
||||
7 X X X X X X X
|
||||
8 X X X X X X
|
||||
9 X X X X X X
|
||||
[b] X X X
|
||||
15 X X X X X X X X X X X
|
||||
18 X X X X X X X X X X X
|
||||
COMPLEX X X X T X X X
|
||||
[c] X X X X X X X X X X X X
|
||||
[d] X X X X X X X X X X X
|
||||
[e] X X X X X X X X X X X X
|
||||
|
||||
18 <- 1
|
||||
|
||||
1 0000 1111 1111 1111 = 0x0FFF
|
||||
[a] 0000 1110 1100 0010 = 0x0EC2
|
||||
7 0000 1110 1100 0110 = 0x0EC6
|
||||
8 0000 1110 1100 0010 = 0x0EC2
|
||||
9 0000 1110 1100 0010 = 0x0EC2
|
||||
[b] 0000 1100 0000 0010 = 0x0C02
|
||||
15 0000 1111 1101 1111 = 0x0FDF
|
||||
18 0000 1111 1101 1111 = 0x0FDF
|
||||
COMPLEX 0000 1111 1100 0010 = 0x0FC2
|
||||
[c] 0000 1111 1111 1111 = 0x0FFF
|
||||
[d] 0000 1111 1101 1111 = 0x0EDF
|
||||
[e] 0000 1111 1111 1111 = 0x0FFF
|
||||
*/
|
||||
|
||||
static const PRUint16 gPairConservative[MAX_CLASSES] = {
|
||||
0x0FFF,
|
||||
0x0EC2,
|
||||
0x0EC6,
|
||||
0x0EC2,
|
||||
0x0EC2,
|
||||
0x0C02,
|
||||
0x0FDF,
|
||||
0x0FDF,
|
||||
0x0FC2,
|
||||
0x0FFF,
|
||||
0x0EDF,
|
||||
0x0FFF
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
|
||||
9. Now we map the class to number
|
||||
|
||||
0: 1
|
||||
1: [a]- 2, 3, 4, 5, 6
|
||||
2: 7
|
||||
|
@ -187,27 +338,59 @@
|
|||
4: 9
|
||||
5: [b]- 10, 11, 12, 17
|
||||
6: 15
|
||||
7: 16
|
||||
8: 18
|
||||
9: COMPLEX
|
||||
7: 18
|
||||
8: COMPLEX
|
||||
9: [c]
|
||||
A: [d]
|
||||
B: [e]
|
||||
|
||||
and they mean:
|
||||
0: Open parenthesis
|
||||
1: Punctuation that prohibits break before
|
||||
2: Non-breakable between same classes
|
||||
3: Prefix
|
||||
4: Postfix
|
||||
5: Breakable character (Spaces and Most Japanese characters)
|
||||
6: Numeric
|
||||
7: Characters
|
||||
8: Need special handling characters (E.g., Thai)
|
||||
9: Open parentheses like Character (See bug 389056)
|
||||
A: Close parenthese (or punctuations) like Character (See bug 389056)
|
||||
B: Non breakable (See bug 390920)
|
||||
|
||||
*/
|
||||
|
||||
#define MAX_CLASSES 10
|
||||
#define CLASS_NONE PR_INT8_MAX
|
||||
|
||||
static const PRUint16 gPair[MAX_CLASSES] = {
|
||||
0x03FF,
|
||||
0x0002,
|
||||
0x0006,
|
||||
0x0042,
|
||||
0x0002,
|
||||
0x0002,
|
||||
0x0152,
|
||||
0x0182,
|
||||
0x01C2,
|
||||
0x0202
|
||||
};
|
||||
#define CLASS_OPEN 0x00
|
||||
#define CLASS_CLOSE 0x01
|
||||
#define CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS 0x02
|
||||
#define CLASS_PREFIX 0x03
|
||||
#define CLASS_POSTFFIX 0x04
|
||||
#define CLASS_BREAKABLE 0x05
|
||||
#define CLASS_NUMERIC 0x06
|
||||
#define CLASS_CHARACTER 0x07
|
||||
#define CLASS_COMPLEX 0x08
|
||||
#define CLASS_OPEN_LIKE_CHARACTER 0x09
|
||||
#define CLASS_CLOSE_LIKE_CHARACTER 0x0A
|
||||
#define CLASS_NON_BREAKABLE 0x0B
|
||||
|
||||
#define U_NULL PRUnichar(0x0000)
|
||||
#define U_SLASH PRUnichar('/')
|
||||
#define U_SPACE PRUnichar(' ')
|
||||
#define U_HYPHEN PRUnichar('-')
|
||||
#define U_EQUAL PRUnichar('=')
|
||||
#define U_PERCENT PRUnichar('%')
|
||||
#define U_AMPERSAND PRUnichar('&')
|
||||
#define U_BACKSLASH PRUnichar('\\')
|
||||
|
||||
#define NEED_CONTEXTUAL_ANALYSIS(c) (IS_HYPHEN(c) || \
|
||||
(c) == U_SLASH || \
|
||||
(c) == U_PERCENT || \
|
||||
(c) == U_AMPERSAND || \
|
||||
(c) == U_BACKSLASH)
|
||||
|
||||
#define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039)
|
||||
|
||||
static inline int
|
||||
GETCLASSFROMTABLE(const PRUint32* t, PRUint16 l)
|
||||
|
@ -215,10 +398,6 @@ GETCLASSFROMTABLE(const PRUint32* t, PRUint16 l)
|
|||
return ((((t)[(l>>3)]) >> ((l & 0x0007)<<2)) & 0x000f);
|
||||
}
|
||||
|
||||
#define CLASS_COMPLEX 9
|
||||
|
||||
|
||||
|
||||
static inline int
|
||||
IS_HALFWIDTH_IN_JISx4051_CLASS3(PRUnichar u)
|
||||
{
|
||||
|
@ -240,54 +419,48 @@ IS_COMPLEX(PRUnichar u)
|
|||
return (0x0e01 <= (u) && (u) <= 0x0e5b);
|
||||
}
|
||||
|
||||
static inline int
|
||||
IS_SPACE(PRUnichar u)
|
||||
static inline PRBool
|
||||
IS_NONBREAKABLE_SPACE(PRUnichar u)
|
||||
{
|
||||
return ((u) == 0x0020 || (u) == 0x0009 || (u) == 0x000a || (u) == 0x000d || (u)==0x200b);
|
||||
return u == 0x00A0 || u == 0x2007; // NO-BREAK SPACE, FIGURE SPACE
|
||||
}
|
||||
|
||||
static PRInt8 GetClass(PRUnichar u)
|
||||
static inline PRBool
|
||||
IS_HYPHEN(PRUnichar u)
|
||||
{
|
||||
return (u == U_HYPHEN ||
|
||||
u == 0x058A || // ARMENIAN HYPHEN
|
||||
u == 0x2010 || // HYPHEN
|
||||
u == 0x2012); // FIGURE DASH
|
||||
}
|
||||
|
||||
static PRInt8
|
||||
GetClass(PRUnichar u)
|
||||
{
|
||||
PRUint16 h = u & 0xFF00;
|
||||
PRUint16 l = u & 0x00ff;
|
||||
PRInt8 c;
|
||||
|
||||
|
||||
// Handle 3 range table first
|
||||
if( 0x0000 == h)
|
||||
{
|
||||
if (0x0000 == h) {
|
||||
c = GETCLASSFROMTABLE(gLBClass00, l);
|
||||
}
|
||||
else if( 0x0E00 == h)
|
||||
{
|
||||
} else if (0x0E00 == h) {
|
||||
c = GETCLASSFROMTABLE(gLBClass0E, l);
|
||||
}
|
||||
else if( 0x2000 == h)
|
||||
{
|
||||
} else if (0x2000 == h) {
|
||||
c = GETCLASSFROMTABLE(gLBClass20, l);
|
||||
}
|
||||
else if( 0x2100 == h)
|
||||
{
|
||||
} else if (0x2100 == h) {
|
||||
c = GETCLASSFROMTABLE(gLBClass21, l);
|
||||
}
|
||||
else if( 0x3000 == h)
|
||||
{
|
||||
} else if (0x3000 == h) {
|
||||
c = GETCLASSFROMTABLE(gLBClass30, l);
|
||||
}
|
||||
else if ( ( ( 0x3200 <= u) && ( u <= 0xA4CF) ) || // CJK and Yi
|
||||
( ( 0xAC00 <= h) && ( h <= 0xD7FF) ) || // Hangul
|
||||
( ( 0xf900 <= h) && ( h <= 0xfaff) )
|
||||
)
|
||||
{
|
||||
c = 5; // CJK character, Han, and Han Compatability
|
||||
}
|
||||
else if( 0xff00 == h)
|
||||
{
|
||||
if( l < 0x0060) // Fullwidth ASCII variant
|
||||
{
|
||||
} else if (((0x3200 <= u) && (u <= 0xA4CF)) || // CJK and Yi
|
||||
((0xAC00 <= h) && (h <= 0xD7FF)) || // Hangul
|
||||
((0xf900 <= h) && (h <= 0xfaff))) {
|
||||
c = CLASS_BREAKABLE; // CJK character, Han, and Han Compatability
|
||||
} else if (0xff00 == h) {
|
||||
if (l < 0x0060) { // Fullwidth ASCII variant
|
||||
c = GETCLASSFROMTABLE(gLBClass00, (l+0x20));
|
||||
} else if (l < 0x00a0) {
|
||||
switch (l)
|
||||
{
|
||||
switch (l) {
|
||||
case 0x61: c = GetClass(0x3002); break;
|
||||
case 0x62: c = GetClass(0x300c); break;
|
||||
case 0x63: c = GetClass(0x300d); break;
|
||||
|
@ -296,53 +469,77 @@ static PRInt8 GetClass(PRUnichar u)
|
|||
case 0x9e: c = GetClass(0x309b); break;
|
||||
case 0x9f: c = GetClass(0x309c); break;
|
||||
default:
|
||||
if(IS_HALFWIDTH_IN_JISx4051_CLASS3(u))
|
||||
c = 1; // jis x4051 class 3
|
||||
if (IS_HALFWIDTH_IN_JISx4051_CLASS3(u))
|
||||
c = CLASS_CLOSE; // jis x4051 class 3
|
||||
else
|
||||
c = 5; // jis x4051 class 11
|
||||
c = CLASS_BREAKABLE; // jis x4051 class 11
|
||||
break;
|
||||
}
|
||||
// Halfwidth Katakana variants
|
||||
} else if( l < 0x00e0) {
|
||||
c = 8; // Halfwidth Hangul variants
|
||||
} else if( l < 0x00f0) {
|
||||
static PRUnichar NarrowFFEx[16] =
|
||||
{
|
||||
// Halfwidth Katakana variants
|
||||
} else if (l < 0x00e0) {
|
||||
c = CLASS_CHARACTER; // Halfwidth Hangul variants
|
||||
} else if (l < 0x00f0) {
|
||||
static PRUnichar NarrowFFEx[16] = {
|
||||
0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0x0000,
|
||||
0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0x0000
|
||||
};
|
||||
c = GetClass(NarrowFFEx[l - 0x00e0]);
|
||||
} else {
|
||||
c = 8;
|
||||
c = CLASS_CHARACTER;
|
||||
}
|
||||
}
|
||||
else if( 0x3100 == h) {
|
||||
if ( l <= 0xbf) { // Hangul Compatibility Jamo, Bopomofo, Kanbun
|
||||
// XXX: This is per UAX #14, but UAX #14 may change
|
||||
// the line breaking rules about Kanbun and Bopomofo.
|
||||
c = 5;
|
||||
} else if (0x3100 == h) {
|
||||
if (l <= 0xbf) { // Hangul Compatibility Jamo, Bopomofo, Kanbun
|
||||
// XXX: This is per UAX #14, but UAX #14 may change
|
||||
// the line breaking rules about Kanbun and Bopomofo.
|
||||
c = CLASS_BREAKABLE;
|
||||
} else if (l >= 0xf0) { // Katakana small letters for Ainu
|
||||
c = CLASS_CLOSE;
|
||||
} else { // unassigned
|
||||
c = CLASS_CHARACTER;
|
||||
}
|
||||
else if ( l >= 0xf0)
|
||||
{ // Katakana small letters for Ainu
|
||||
c = 1;
|
||||
}
|
||||
else // unassigned
|
||||
{
|
||||
c = 8;
|
||||
}
|
||||
}
|
||||
else {
|
||||
c = 8; // others
|
||||
} else if (0x0300 == h) {
|
||||
if (0x4F == l || (0x5C <= l && l <= 0x62))
|
||||
c = CLASS_NON_BREAKABLE;
|
||||
else
|
||||
c = CLASS_CHARACTER;
|
||||
} else if (0x0500 == h) {
|
||||
// ARMENIAN HYPHEN (for "Breaking Hyphens" of UAX#14)
|
||||
if (l == 0x8A)
|
||||
c = GETCLASSFROMTABLE(gLBClass00, PRUint16(U_HYPHEN));
|
||||
else
|
||||
c = CLASS_CHARACTER;
|
||||
} else if (0x0F00 == h) {
|
||||
if (0x08 == l || 0x0C == l || 0x12 == l)
|
||||
c = CLASS_NON_BREAKABLE;
|
||||
else
|
||||
c = CLASS_CHARACTER;
|
||||
} else if (0x1800 == h) {
|
||||
if (0x0E == l)
|
||||
c = CLASS_NON_BREAKABLE;
|
||||
else
|
||||
c = CLASS_CHARACTER;
|
||||
} else {
|
||||
c = CLASS_CHARACTER; // others
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
static PRBool GetPair(PRInt8 c1, PRInt8 c2)
|
||||
static PRBool
|
||||
GetPair(PRInt8 c1, PRInt8 c2)
|
||||
{
|
||||
NS_ASSERTION( c1 < MAX_CLASSES ,"illegal classes 1");
|
||||
NS_ASSERTION( c2 < MAX_CLASSES ,"illegal classes 2");
|
||||
NS_ASSERTION(c1 < MAX_CLASSES ,"illegal classes 1");
|
||||
NS_ASSERTION(c2 < MAX_CLASSES ,"illegal classes 2");
|
||||
|
||||
return (0 == ((gPair[c1] >> c2 ) & 0x0001));
|
||||
return (0 == ((gPair[c1] >> c2) & 0x0001));
|
||||
}
|
||||
|
||||
static PRBool
|
||||
GetPairConservative(PRInt8 c1, PRInt8 c2)
|
||||
{
|
||||
NS_ASSERTION(c1 < MAX_CLASSES ,"illegal classes 1");
|
||||
NS_ASSERTION(c2 < MAX_CLASSES ,"illegal classes 2");
|
||||
|
||||
return (0 == ((gPairConservative[c1] >> c2) & 0x0001));
|
||||
}
|
||||
|
||||
nsJISx4051LineBreaker::nsJISx4051LineBreaker()
|
||||
|
@ -355,81 +552,204 @@ nsJISx4051LineBreaker::~nsJISx4051LineBreaker()
|
|||
|
||||
NS_IMPL_ISUPPORTS1(nsJISx4051LineBreaker, nsILineBreaker)
|
||||
|
||||
#define U_PERIOD PRUnichar('.')
|
||||
#define U_COMMA PRUnichar(',')
|
||||
#define U_SEMICOLON PRUnichar(';')
|
||||
#define U_SLASH PRUnichar('/')
|
||||
#define U_SPACE PRUnichar(' ')
|
||||
#define U_HYPHEN PRUnichar('-')
|
||||
#define U_EQUAL PRUnichar('=')
|
||||
#define U_NULL PRUnichar(0x0000)
|
||||
#define U_RIGHT_SINGLE_QUOTATION_MARK PRUnichar(0x2019)
|
||||
#define NEED_CONTEXTUAL_ANALYSIS(c) ((c) == U_PERIOD || \
|
||||
(c) == U_COMMA || \
|
||||
(c) == U_SEMICOLON || \
|
||||
(c) == U_SLASH || \
|
||||
(c) == U_HYPHEN || \
|
||||
(c) == U_EQUAL || \
|
||||
(c) == U_RIGHT_SINGLE_QUOTATION_MARK)
|
||||
#define NUMERIC_CLASS 6 // JIS x4051 class 15 is now map to simplified class 6
|
||||
#define CHARACTER_CLASS 8 // JIS x4051 class 18 is now map to simplified class 8
|
||||
#define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039)
|
||||
class ContextState {
|
||||
public:
|
||||
ContextState(const PRUnichar* aText, PRUint32 aLength) {
|
||||
mUniText = aText;
|
||||
mText = nsnull;
|
||||
mLength = aLength;
|
||||
Init();
|
||||
}
|
||||
|
||||
static PRInt8 ContextualAnalysis(
|
||||
PRUnichar prev, PRUnichar cur, PRUnichar next)
|
||||
ContextState(const PRUint8* aText, PRUint32 aLength) {
|
||||
mUniText = nsnull;
|
||||
mText = aText;
|
||||
mLength = aLength;
|
||||
Init();
|
||||
}
|
||||
|
||||
PRUint32 Length() { return mLength; }
|
||||
PRUint32 Index() { return mIndex; }
|
||||
|
||||
PRUnichar GetCharAt(PRUint32 aIndex) {
|
||||
NS_ASSERTION(0 <= aIndex && aIndex < mLength, "Out of range!");
|
||||
return mUniText ? mUniText[aIndex] : PRUnichar(mText[aIndex]);
|
||||
}
|
||||
|
||||
void AdvanceIndexTo(PRUint32 aIndex) {
|
||||
NS_ASSERTION(mIndex <= aIndex, "the index cannot decrease.");
|
||||
NS_ASSERTION(aIndex < mLength, "out of range");
|
||||
mIndex = aIndex;
|
||||
}
|
||||
|
||||
void NotifyBreakBefore() { mLastBreakIndex = mIndex; }
|
||||
|
||||
// A word of western language should not be broken. But even if the word has
|
||||
// only ASCII characters, non-natural context words should be broken, e.g.,
|
||||
// URL and file path. For protecting the natural words, we should use
|
||||
// conservative breaking rules at following conditions:
|
||||
// 1. at near the start of word
|
||||
// 2. at near the end of word
|
||||
// 3. at near the latest broken point
|
||||
// CONSERVATIVE_BREAK_RANGE define the 'near' in characters.
|
||||
#define CONSERVATIVE_BREAK_RANGE 6
|
||||
|
||||
PRBool UseConservativeBreaking(PRUint32 aOffset = 0) {
|
||||
if (mHasCJKChar)
|
||||
return PR_FALSE;
|
||||
PRUint32 index = mIndex + aOffset;
|
||||
PRBool result = (index < CONSERVATIVE_BREAK_RANGE ||
|
||||
mLength - index < CONSERVATIVE_BREAK_RANGE ||
|
||||
index - mLastBreakIndex < CONSERVATIVE_BREAK_RANGE);
|
||||
if (result || !mHasNonbreakableSpace)
|
||||
return result;
|
||||
|
||||
// This text has no-breakable space, we need to check whether the index
|
||||
// is near it.
|
||||
|
||||
// Note that index is always larger than CONSERVATIVE_BREAK_RANGE here.
|
||||
for (PRUint32 i = index - 1; index - CONSERVATIVE_BREAK_RANGE < i; --i) {
|
||||
if (IS_NONBREAKABLE_SPACE(GetCharAt(i)))
|
||||
return PR_TRUE;
|
||||
if (i == 0)
|
||||
break;
|
||||
}
|
||||
// Note that index is always less than mLength - CONSERVATIVE_BREAK_RANGE.
|
||||
for (PRUint32 i = index + 1; i < index + CONSERVATIVE_BREAK_RANGE; ++i) {
|
||||
if (IS_NONBREAKABLE_SPACE(GetCharAt(i)))
|
||||
return PR_TRUE;
|
||||
}
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
PRBool HasCharacterAlready(PRUnichar aCh) {
|
||||
// Be careful for the index being unsigned.
|
||||
if (mIndex == 0)
|
||||
return PR_FALSE;
|
||||
for (PRUint32 i = mIndex - 1; 0 < i; --i) {
|
||||
if (GetCharAt(i) == aCh)
|
||||
return PR_TRUE;
|
||||
if (i == 0)
|
||||
break;
|
||||
}
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
PRUnichar GetPreviousNonHyphenCharacter() {
|
||||
NS_ASSERTION(IS_HYPHEN(GetCharAt(mIndex)),
|
||||
"current character isn't hyphen");
|
||||
// Be careful for the index being unsigned.
|
||||
if (mIndex == 0)
|
||||
return PR_FALSE;
|
||||
for (PRUint32 i = mIndex - 1; 0 < i; --i) {
|
||||
PRUnichar ch = GetCharAt(i);
|
||||
if (!IS_HYPHEN(ch))
|
||||
return ch;
|
||||
if (i == 0)
|
||||
break;
|
||||
}
|
||||
return U_NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
void Init() {
|
||||
mIndex = 0;
|
||||
mLastBreakIndex = 0;
|
||||
mHasCJKChar = 0;
|
||||
mHasNonbreakableSpace = 0;
|
||||
|
||||
for (PRUint32 i = 0; i < mLength; ++i) {
|
||||
PRUnichar u = GetCharAt(i);
|
||||
if (!mHasNonbreakableSpace && IS_NONBREAKABLE_SPACE(u))
|
||||
mHasNonbreakableSpace = 1;
|
||||
else if (mUniText && !mHasCJKChar && IS_CJK_CHAR(u))
|
||||
mHasCJKChar = 1;
|
||||
}
|
||||
}
|
||||
|
||||
const PRUnichar* mUniText;
|
||||
const PRUint8* mText;
|
||||
|
||||
PRUint32 mIndex;
|
||||
PRUint32 mLength; // length of text
|
||||
PRUint32 mLastBreakIndex;
|
||||
PRPackedBool mHasCJKChar; // if the text has CJK character, this is true.
|
||||
PRPackedBool mHasNonbreakableSpace; // if the text has no-breakable space,
|
||||
// this is true.
|
||||
};
|
||||
|
||||
static PRInt8
|
||||
ContextualAnalysis(PRUnichar prev, PRUnichar cur, PRUnichar next,
|
||||
ContextState &aState)
|
||||
{
|
||||
if(U_COMMA == cur || U_SEMICOLON == cur)
|
||||
{
|
||||
if((IS_ASCII_DIGIT(prev) || prev == U_NULL) && IS_ASCII_DIGIT(next))
|
||||
return NUMERIC_CLASS;
|
||||
}
|
||||
else if(U_PERIOD == cur)
|
||||
{
|
||||
if((IS_ASCII_DIGIT(prev) || prev == U_SPACE || prev == U_NULL) &&
|
||||
IS_ASCII_DIGIT(next))
|
||||
return NUMERIC_CLASS;
|
||||
// Don't return CLASS_OPEN/CLASS_CLOSE if aState.UseJISX4051 is FALSE.
|
||||
|
||||
// By assigning a full stop character class only when it's followed by
|
||||
// class 6 (numeric), 7, and 8 (character). Note that class 9 (Thai)
|
||||
// doesn't matter, either way, we prevent lines from breaking around
|
||||
// full stop in those cases while still allowing it to end a line when
|
||||
// followed by CJK characters. With an additional condition of it being
|
||||
// preceded by class 0 or class > 5, we make sure that it does not
|
||||
// start a line (see bug 164759).
|
||||
PRUint8 pc = prev != U_NULL ? GetClass(prev) : CHARACTER_CLASS;
|
||||
if((pc > 5 || pc == 0) && GetClass(next) > 5)
|
||||
return CHARACTER_CLASS;
|
||||
}
|
||||
else if(U_SLASH == cur || U_HYPHEN == cur || U_EQUAL == cur)
|
||||
{
|
||||
// if slash is a first character, don't break at this point (e.g., "/root")
|
||||
if (U_SLASH == cur && prev == U_NULL)
|
||||
return CHARACTER_CLASS;
|
||||
if (IS_ASCII_DIGIT(next))
|
||||
return NUMERIC_CLASS;
|
||||
}
|
||||
else if(U_RIGHT_SINGLE_QUOTATION_MARK == cur)
|
||||
{
|
||||
// somehow people use this as ' in "it's" sometimes...
|
||||
if(U_SPACE != next)
|
||||
return CHARACTER_CLASS;
|
||||
}
|
||||
return GetClass(cur);
|
||||
if (IS_HYPHEN(cur)) {
|
||||
// If next character is hyphen, we don't need to break between them.
|
||||
if (IS_HYPHEN(next))
|
||||
return CLASS_CHARACTER;
|
||||
// If prev and next characters are numeric, it may be in Math context.
|
||||
// So, we should not break here.
|
||||
PRBool prevIsNum = IS_ASCII_DIGIT(prev);
|
||||
PRBool nextIsNum = IS_ASCII_DIGIT(next);
|
||||
if (prevIsNum && nextIsNum)
|
||||
return CLASS_NUMERIC;
|
||||
// If one side is numeric and the other is a character, or if both sides are
|
||||
// characters, the hyphen should be breakable.
|
||||
if (!aState.UseConservativeBreaking(1)) {
|
||||
PRUnichar prevOfHyphen = aState.GetPreviousNonHyphenCharacter();
|
||||
if (prevOfHyphen && next) {
|
||||
PRBool prevIsChar = !NEED_CONTEXTUAL_ANALYSIS(prevOfHyphen) &&
|
||||
GetClass(prevOfHyphen) == CLASS_CHARACTER;
|
||||
PRBool nextIsChar = !NEED_CONTEXTUAL_ANALYSIS(next) &&
|
||||
GetClass(next) == CLASS_CHARACTER;
|
||||
if ((prevIsNum || prevIsChar) && (nextIsNum || nextIsChar))
|
||||
return CLASS_CLOSE;
|
||||
}
|
||||
}
|
||||
} else if (cur == U_SLASH || cur == U_BACKSLASH) {
|
||||
// If this is immediately after same char, we should not break here.
|
||||
if (prev == cur)
|
||||
return CLASS_CHARACTER;
|
||||
// If this text has two or more (BACK)SLASHs, this may be file path or URL.
|
||||
if (!aState.UseConservativeBreaking() &&
|
||||
aState.HasCharacterAlready(cur))
|
||||
return CLASS_OPEN;
|
||||
} else if (cur == U_PERCENT) {
|
||||
// If this is a part of the param of URL, we should break before.
|
||||
if (!aState.UseConservativeBreaking()) {
|
||||
if (aState.Index() >= 3 &&
|
||||
aState.GetCharAt(aState.Index() - 3) == U_PERCENT)
|
||||
return CLASS_OPEN;
|
||||
if (aState.Index() + 3 < aState.Length() &&
|
||||
aState.GetCharAt(aState.Index() + 3) == U_PERCENT)
|
||||
return CLASS_OPEN;
|
||||
}
|
||||
} else if (cur == U_AMPERSAND) {
|
||||
// If this may be a separator of params of URL, we should break after.
|
||||
if (!aState.UseConservativeBreaking(1) &&
|
||||
aState.HasCharacterAlready(U_EQUAL))
|
||||
return CLASS_CLOSE;
|
||||
} else {
|
||||
NS_ERROR("Forgot to handle the current character!");
|
||||
}
|
||||
return GetClass(cur);
|
||||
}
|
||||
|
||||
|
||||
PRInt32 nsJISx4051LineBreaker::WordMove(
|
||||
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos, PRInt8 aDirection)
|
||||
PRInt32
|
||||
nsJISx4051LineBreaker::WordMove(const PRUnichar* aText, PRUint32 aLen,
|
||||
PRUint32 aPos, PRInt8 aDirection)
|
||||
{
|
||||
PRBool textNeedsJISx4051 = PR_FALSE;
|
||||
PRInt32 begin, end;
|
||||
|
||||
for (begin = aPos; begin > 0 && !IS_SPACE(aText[begin - 1]); --begin) {
|
||||
for (begin = aPos; begin > 0 && !NS_IsSpace(aText[begin - 1]); --begin) {
|
||||
if (IS_CJK_CHAR(aText[begin]) || IS_COMPLEX(aText[begin])) {
|
||||
textNeedsJISx4051 = PR_TRUE;
|
||||
}
|
||||
}
|
||||
for (end = aPos + 1; end < PRInt32(aLen) && !IS_SPACE(aText[end]); ++end) {
|
||||
for (end = aPos + 1; end < PRInt32(aLen) && !NS_IsSpace(aText[end]); ++end) {
|
||||
if (IS_CJK_CHAR(aText[end]) || IS_COMPLEX(aText[end])) {
|
||||
textNeedsJISx4051 = PR_TRUE;
|
||||
}
|
||||
|
@ -458,8 +778,9 @@ PRInt32 nsJISx4051LineBreaker::WordMove(
|
|||
return ret;
|
||||
}
|
||||
|
||||
PRInt32 nsJISx4051LineBreaker::Next(
|
||||
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
|
||||
PRInt32
|
||||
nsJISx4051LineBreaker::Next(const PRUnichar* aText, PRUint32 aLen,
|
||||
PRUint32 aPos)
|
||||
{
|
||||
NS_ASSERTION(aText, "aText shouldn't be null");
|
||||
NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
|
||||
|
@ -468,8 +789,9 @@ PRInt32 nsJISx4051LineBreaker::Next(
|
|||
return nextPos < PRInt32(aLen) ? nextPos : NS_LINEBREAKER_NEED_MORE_TEXT;
|
||||
}
|
||||
|
||||
PRInt32 nsJISx4051LineBreaker::Prev(
|
||||
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
|
||||
PRInt32
|
||||
nsJISx4051LineBreaker::Prev(const PRUnichar* aText, PRUint32 aLen,
|
||||
PRUint32 aPos)
|
||||
{
|
||||
NS_ASSERTION(aText, "aText shouldn't be null");
|
||||
NS_ASSERTION(aLen >= aPos, "Illegal value (length >= position)");
|
||||
|
@ -483,16 +805,19 @@ nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUnichar* aChars, PRUint32 aLeng
|
|||
PRPackedBool* aBreakBefore)
|
||||
{
|
||||
PRUint32 cur;
|
||||
PRInt8 lastClass = -1;
|
||||
PRInt8 lastClass = CLASS_NONE;
|
||||
ContextState state(aChars, aLength);
|
||||
|
||||
for (cur = 0; cur < aLength; ++cur) {
|
||||
PRUnichar ch = aChars[cur];
|
||||
PRInt8 cl;
|
||||
state.AdvanceIndexTo(cur);
|
||||
|
||||
if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
|
||||
cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL,
|
||||
ch,
|
||||
cur + 1 < aLength ? aChars[cur + 1] : U_NULL);
|
||||
cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
|
||||
state);
|
||||
} else {
|
||||
cl = GetClass(ch);
|
||||
}
|
||||
|
@ -501,11 +826,16 @@ nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUnichar* aChars, PRUint32 aLeng
|
|||
if (cur > 0) {
|
||||
NS_ASSERTION(CLASS_COMPLEX != lastClass || CLASS_COMPLEX != cl,
|
||||
"Loop should have prevented adjacent complex chars here");
|
||||
allowBreak = GetPair(lastClass, cl);
|
||||
if (state.UseConservativeBreaking())
|
||||
allowBreak = GetPairConservative(lastClass, cl);
|
||||
else
|
||||
allowBreak = GetPair(lastClass, cl);
|
||||
} else {
|
||||
allowBreak = PR_FALSE;
|
||||
}
|
||||
aBreakBefore[cur] = allowBreak;
|
||||
if (allowBreak)
|
||||
state.NotifyBreakBefore();
|
||||
lastClass = cl;
|
||||
if (CLASS_COMPLEX == cl) {
|
||||
PRUint32 end = cur + 1;
|
||||
|
@ -530,27 +860,35 @@ nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUint8* aChars, PRUint32 aLength
|
|||
PRPackedBool* aBreakBefore)
|
||||
{
|
||||
PRUint32 cur;
|
||||
PRInt8 lastClass = -1;
|
||||
PRInt8 lastClass = CLASS_NONE;
|
||||
ContextState state(aChars, aLength);
|
||||
|
||||
for (cur = 0; cur < aLength; ++cur) {
|
||||
PRUnichar ch = aChars[cur];
|
||||
PRInt8 cl;
|
||||
state.AdvanceIndexTo(cur);
|
||||
|
||||
if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
|
||||
cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL,
|
||||
ch,
|
||||
cur + 1 < aLength ? aChars[cur + 1] : U_NULL);
|
||||
cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
|
||||
state);
|
||||
} else {
|
||||
cl = GetClass(ch);
|
||||
}
|
||||
|
||||
PRBool allowBreak;
|
||||
if (cur > 0) {
|
||||
allowBreak = GetPair(lastClass, cl);
|
||||
if (state.UseConservativeBreaking())
|
||||
allowBreak = GetPairConservative(lastClass, cl);
|
||||
else
|
||||
allowBreak = GetPair(lastClass, cl);
|
||||
} else {
|
||||
allowBreak = PR_FALSE;
|
||||
}
|
||||
aBreakBefore[cur] = allowBreak;
|
||||
if (allowBreak)
|
||||
state.NotifyBreakBefore();
|
||||
lastClass = cl;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,11 +53,10 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>14</TD>
|
||||
<TD>3</TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>17</TD>
|
||||
<TD>12</TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>13</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -78,24 +77,25 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD>13</TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
<TD>11</TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>01_[a]<TH>
|
||||
<TD></TD>
|
||||
<TD>31</TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
<TD>32</TD>
|
||||
<TD>6</TD>
|
||||
<TD>28</TD>
|
||||
<TD>3</TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>71</TD>
|
||||
<TD BGCOLOR=white>64</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -112,17 +112,17 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
<TD>14</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>16</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
<TD>3</TD>
|
||||
<TD>4</TD>
|
||||
<TD>12</TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD>11</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
|
@ -131,9 +131,48 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>4</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>4</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>3</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>03_8<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -151,7 +190,6 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -161,44 +199,6 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>03_8<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>3</TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>3</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -209,8 +209,8 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>4</TD>
|
||||
<TD>1</TD>
|
||||
<TD>5</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>5</TD>
|
||||
<TD></TD>
|
||||
|
@ -233,9 +233,9 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>4</TD>
|
||||
<TD>5</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -245,20 +245,20 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
</TR>
|
||||
<TR><TH>05_[b]<TH>
|
||||
<TD>33</TD>
|
||||
<TD>154</TD>
|
||||
<TD>153</TD>
|
||||
<TD></TD>
|
||||
<TD>53</TD>
|
||||
<TD>33</TD>
|
||||
<TD>2</TD>
|
||||
<TD>305</TD>
|
||||
<TD>13</TD>
|
||||
<TD BGCOLOR=white>560</TD>
|
||||
<TD>5</TD>
|
||||
<TD>12</TD>
|
||||
<TD BGCOLOR=white>238</TD>
|
||||
<TD>32</TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>154</TD>
|
||||
<TD>153</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -266,7 +266,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>33</TD>
|
||||
<TD>20</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -277,10 +277,10 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>305</TD>
|
||||
<TD>5</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>13</TD>
|
||||
<TD>12</TD>
|
||||
</TR>
|
||||
<TR><TH>06_15<TH>
|
||||
<TD></TD>
|
||||
|
@ -321,85 +321,46 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>07_16<TH>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>08_18<TH>
|
||||
<TD>10</TD>
|
||||
<TD>659</TD>
|
||||
<TD>4</TD>
|
||||
<TD>130</TD>
|
||||
<TD>56</TD>
|
||||
<TD>941</TD>
|
||||
<TD>2</TD>
|
||||
<TD BGCOLOR=white>1802</TD>
|
||||
<TD></TD>
|
||||
<TD>10</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>368</TD>
|
||||
<TD>1</TD>
|
||||
<TD>4</TD>
|
||||
<TD></TD>
|
||||
<TD>286</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>4</TD>
|
||||
<TR><TH>07_18<TH>
|
||||
<TD>19</TD>
|
||||
<TD>157</TD>
|
||||
<TD></TD>
|
||||
<TD>33</TD>
|
||||
<TD>54</TD>
|
||||
<TD>125</TD>
|
||||
<TD>3</TD>
|
||||
<TD>127</TD>
|
||||
<TD>3</TD>
|
||||
<TD BGCOLOR=white>391</TD>
|
||||
<TD></TD>
|
||||
<TD>19</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>67</TD>
|
||||
<TD>5</TD>
|
||||
<TD>3</TD>
|
||||
<TD>4</TD>
|
||||
<TD>6</TD>
|
||||
<TD></TD>
|
||||
<TD>81</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>3</TD>
|
||||
<TD>30</TD>
|
||||
<TD>5</TD>
|
||||
<TD>12</TD>
|
||||
<TD>10</TD>
|
||||
<TD>273</TD>
|
||||
<TD>646</TD>
|
||||
<TD>1</TD>
|
||||
<TD>1</TD>
|
||||
<TD>4</TD>
|
||||
<TD>2</TD>
|
||||
<TD>2</TD>
|
||||
<TD>2</TD>
|
||||
<TD>4</TD>
|
||||
<TD>36</TD>
|
||||
<TD>4</TD>
|
||||
<TD></TD>
|
||||
<TD>3</TD>
|
||||
<TD>23</TD>
|
||||
<TD>99</TD>
|
||||
<TD>1</TD>
|
||||
<TD>1</TD>
|
||||
<TD>1</TD>
|
||||
</TR>
|
||||
<TR><TH>09_COMPLEX<TH>
|
||||
<TR><TH>08_COMPLEX<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -438,6 +399,123 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>09_[c]<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>3</TD>
|
||||
<TD>4</TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>7</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>3</TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>0A_[d]<TH>
|
||||
<TD>1</TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
<TD>6</TD>
|
||||
<TD>21</TD>
|
||||
<TD>16</TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>46</TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>6</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>3</TD>
|
||||
<TD>1</TD>
|
||||
<TD>1</TD>
|
||||
<TD>16</TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
<TD>3</TD>
|
||||
<TD>7</TD>
|
||||
<TD>4</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>0B_[e]<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD>1</TD>
|
||||
<TD>3</TD>
|
||||
<TD BGCOLOR=white>5</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>3</TD>
|
||||
</TR>
|
||||
<TR><TH>X<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -487,74 +565,26 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD BGCOLOR=red>04_9</TD>
|
||||
<TD BGCOLOR=red>05_[b]</TD>
|
||||
<TD BGCOLOR=red>06_15</TD>
|
||||
<TD BGCOLOR=red>07_16</TD>
|
||||
<TD BGCOLOR=red>08_18</TD>
|
||||
<TD BGCOLOR=red>09_COMPLEX</TD>
|
||||
<TD BGCOLOR=red>07_18</TD>
|
||||
<TD BGCOLOR=red>08_COMPLEX</TD>
|
||||
<TD BGCOLOR=red>09_[c]</TD>
|
||||
<TD BGCOLOR=red>0A_[d]</TD>
|
||||
<TD BGCOLOR=red>0B_[e]</TD>
|
||||
<TD BGCOLOR=red>X</TD>
|
||||
</TR>
|
||||
<TR><TH>00<TH>
|
||||
<TD>6</TD>
|
||||
<TD>14</TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>33</TD>
|
||||
<TD>10</TD>
|
||||
<TD>126</TD>
|
||||
<TD></TD>
|
||||
<TD>156</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>01<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>128</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>02<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>89</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>03<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>76</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>04<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>226</TD>
|
||||
<TD></TD>
|
||||
<TD>7</TD>
|
||||
<TD>44</TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>0E<TH>
|
||||
|
@ -565,22 +595,26 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>20</TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>20<TH>
|
||||
<TD>2</TD>
|
||||
<TD>11</TD>
|
||||
<TD>4</TD>
|
||||
<TD></TD>
|
||||
<TD>5</TD>
|
||||
<TD>1</TD>
|
||||
<TD>12</TD>
|
||||
<TD></TD>
|
||||
<TD>4</TD>
|
||||
<TD>13</TD>
|
||||
<TD>101</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>86</TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
<TD>3</TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>21<TH>
|
||||
|
@ -591,88 +625,12 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD>32</TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD>162</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>22<TH>
|
||||
<TD>163</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>242</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>23<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>24<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>139</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>25<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>230</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>26<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>106</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>27<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>160</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>30<TH>
|
||||
<TD>10</TD>
|
||||
|
@ -686,43 +644,6 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>32<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>132</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>33<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>188</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>4E<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>256</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
|
|
|
@ -396,11 +396,11 @@ printf "[%s || %s]\n", $r, $def;
|
|||
}
|
||||
print HEADER "};\n\n";
|
||||
}
|
||||
printarray("00", "8");
|
||||
printarray("20", "8");
|
||||
printarray("21", "8");
|
||||
printarray("00", "7");
|
||||
printarray("20", "7");
|
||||
printarray("21", "7");
|
||||
printarray("30", "5");
|
||||
printarray("0E", "9");
|
||||
printarray("0E", "8");
|
||||
|
||||
#print %rangecount;
|
||||
|
||||
|
|
|
@ -1,7 +1,85 @@
|
|||
0028;;1
|
||||
002F;;2
|
||||
005B;;1
|
||||
007B;;1
|
||||
0000;001f;17
|
||||
0020;;17
|
||||
0024;;24
|
||||
0027;;18
|
||||
0028;;22
|
||||
002D;;18
|
||||
002F;;18
|
||||
0021;002F;23
|
||||
0030;0039;15
|
||||
003C;;22
|
||||
003F;;4
|
||||
003A;003F;23
|
||||
0040;;18
|
||||
0041;005A;18
|
||||
005B;;22
|
||||
005E;;18
|
||||
005F;;18
|
||||
005B;005F;23
|
||||
0060;;18
|
||||
0061;007A;18
|
||||
007B;;22
|
||||
007B;007E;23
|
||||
00A0;;24
|
||||
00A3;;22
|
||||
00A5;;22
|
||||
00A9;;18
|
||||
00AA;;18
|
||||
00AC;;22
|
||||
00AE;;18
|
||||
00AF;;18
|
||||
00A1;00BF;23
|
||||
00B0;;18
|
||||
00F7;;23
|
||||
00C0;00FF;18
|
||||
0E3F;;1
|
||||
0E2F;;4
|
||||
0E46;;4
|
||||
0E5A;0E5B;4
|
||||
0E50;0E59;15
|
||||
0E4F;;18
|
||||
0EAF;;4
|
||||
0EC6;;4
|
||||
0ED0;0ED9;15
|
||||
2007;;24
|
||||
2000;200B;17
|
||||
200C;200F;18
|
||||
2010;;2
|
||||
2011;;24
|
||||
2012;2013;2
|
||||
2014;;7
|
||||
2015;;18
|
||||
2016;2017;18
|
||||
2018;201F;18
|
||||
2020;2023;18
|
||||
2024;2026;7
|
||||
2027;;23
|
||||
2028;202E;18
|
||||
202F;;24
|
||||
2030;2034;9
|
||||
2035;2038;18
|
||||
2039;;1
|
||||
203A;;2
|
||||
203B;;12
|
||||
203C;203D;3
|
||||
203E;;23
|
||||
203F;2043;18
|
||||
2044;;3
|
||||
2045;;1
|
||||
2046;;2
|
||||
2047;2049;3
|
||||
204A;2063;18
|
||||
206A;206F;18
|
||||
2070;2071;18
|
||||
2074;208E;18
|
||||
2090;2094;18
|
||||
2116;;8
|
||||
2160;217F;12
|
||||
2190;21EA;a12
|
||||
2126;;18
|
||||
2100;2138;18
|
||||
2153;2182;18
|
||||
2190;21EA;18
|
||||
3008;;1
|
||||
300A;;1
|
||||
300C;;1
|
||||
|
@ -12,10 +90,6 @@
|
|||
3018;;1
|
||||
301A;;1
|
||||
301D;;1
|
||||
0029;;2
|
||||
002C;;2
|
||||
005D;;2
|
||||
007D;;2
|
||||
3001;;2
|
||||
3009;;2
|
||||
300B;;2
|
||||
|
@ -28,8 +102,6 @@
|
|||
301B;;2
|
||||
301E;;2
|
||||
301F;;2
|
||||
203C;;3
|
||||
2044;;3
|
||||
301C;;3
|
||||
3041;;3
|
||||
3043;;3
|
||||
|
@ -58,65 +130,11 @@
|
|||
30FC;;3
|
||||
30FD;;3
|
||||
30FE;;3
|
||||
0021;;4
|
||||
003F;;4
|
||||
003A;;18
|
||||
003B;;5
|
||||
30FB;;5
|
||||
002E;;6
|
||||
3002;;6
|
||||
2014;;7
|
||||
2024;;2
|
||||
2025;;2
|
||||
2026;;2
|
||||
0024;;1
|
||||
005C;;1
|
||||
00A3;;8
|
||||
00A5;;8
|
||||
2116;;8
|
||||
0025;;2
|
||||
00A2;;9
|
||||
00B0;;2
|
||||
2030;;9
|
||||
2031;;9
|
||||
2032;;9
|
||||
2033;;9
|
||||
3000;;10
|
||||
3042;3094;11
|
||||
3099;309E;3
|
||||
002B;;18
|
||||
002D;;2
|
||||
003C;;1
|
||||
003D;;2
|
||||
003E;;2
|
||||
00A7;;18
|
||||
00A9;;18
|
||||
00AE;;18
|
||||
00B1;;18
|
||||
00B6;;18
|
||||
00D7;;18
|
||||
00F7;;18
|
||||
203B;;12
|
||||
2160;217F;12
|
||||
2190;21EA;a12
|
||||
2460;24EA;a12
|
||||
2500;257F;a12
|
||||
2580;2595;a12
|
||||
25A0;25EF;a12
|
||||
2600;2613;a12
|
||||
261A;266F;a12
|
||||
2701;2704;a12
|
||||
2706;2709;a12
|
||||
270C;2727;a12
|
||||
2729;274B;a12
|
||||
274D;;a12
|
||||
274F;2752;a12
|
||||
2756;;a12
|
||||
2758;275E;a12
|
||||
2761;2767;a12
|
||||
2776;2794;a12
|
||||
2798;27AF;a12
|
||||
27B1;27BE;a12
|
||||
3003;;12
|
||||
3004;;12
|
||||
3006;;12
|
||||
|
@ -126,76 +144,3 @@
|
|||
3020;;12
|
||||
3036;;12
|
||||
30A2;30FA;12
|
||||
3220;3243;12
|
||||
3280;32B0;12
|
||||
32D0;32FE;12
|
||||
3300;3357;12
|
||||
3371;3376;12
|
||||
3380;33DD;12
|
||||
4E00;9F45;12
|
||||
0030;0039;15
|
||||
2126;;16
|
||||
0020;;17
|
||||
0000;001f;17
|
||||
0021;007E;18
|
||||
00A1;00FF;18
|
||||
0100;017F;18
|
||||
0250;02A8;18
|
||||
0374;0375;18
|
||||
037A;;18
|
||||
037E;;18
|
||||
0384;038A;18
|
||||
038C;;18
|
||||
038E;03A1;18
|
||||
03A3;03CE;18
|
||||
0401;040C;18
|
||||
040E;044F;18
|
||||
0451;045C;18
|
||||
045E;047F;18
|
||||
0480;0486;18
|
||||
0480;0486;18
|
||||
0490;04C4;18
|
||||
04C7;04C8;18
|
||||
04CB;04CC;18
|
||||
04D0;04EB;18
|
||||
04EE;04F5;18
|
||||
04F8;04F9;18
|
||||
2000;200B;17
|
||||
200C;202E;18
|
||||
2030;2046;18
|
||||
2070;;18
|
||||
2074;208E;18
|
||||
20A0;20AA;18
|
||||
2100;2138;18
|
||||
2153;2182;18
|
||||
2190;21EA;18
|
||||
2200;227F;18
|
||||
2280;22F1;18
|
||||
2312;;18
|
||||
2460;24EA;18
|
||||
2500;257F;18
|
||||
2580;2595;18
|
||||
25A0;25EF;18
|
||||
2600;2613;18
|
||||
261A;266F;18
|
||||
2701;2704;18
|
||||
2706;2709;18
|
||||
270C;2727;18
|
||||
2729;274B;18
|
||||
274D;;18
|
||||
274F;2752;18
|
||||
2756;;18
|
||||
2758;275E;18
|
||||
2761;2767;18
|
||||
2776;2794;18
|
||||
2798;27AF;18
|
||||
27B1;27BE;18
|
||||
0E3F;;1
|
||||
0E2F;;4
|
||||
0E46;;4
|
||||
0E5A;0E5B;4
|
||||
0E50;0E59;15
|
||||
0E4F;;18
|
||||
0EAF;;4
|
||||
0EC6;;4
|
||||
0ED0;0ED9;15
|
||||
|
|
|
@ -13,9 +13,12 @@
|
|||
13;X
|
||||
14;X
|
||||
15;06_15
|
||||
16;07_16
|
||||
16;X
|
||||
17;05_[b]
|
||||
18;08_18
|
||||
18;07_18
|
||||
19;X
|
||||
20;X
|
||||
21;09_COMPLEX
|
||||
21;08_COMPLEX
|
||||
22;09_[c]
|
||||
23;0A_[d]
|
||||
24;0B_[e]
|
||||
|
|
|
@ -21,99 +21,102 @@ td {
|
|||
</head>
|
||||
<body>
|
||||
<p>This is a specification table for line breaking.</p>
|
||||
<p>The value 'A' means the line breakable After the character, and 'B' means Before. 'BA' means Before and After.</p>
|
||||
<p>(C) which is the tail of the browser name means Character. (N) means Numeric.
|
||||
This means that they are around the character. E.g., "a$a" is a testcase for (C), "0$0" is a testcase for (N).</p>
|
||||
<p>The values of IE7 and Opera9: 'A' means that the line is breakable After the character, and 'B' means Before. 'BA' means Before and After.</p>
|
||||
<p>(C) which is the tail of the IE7 and the Opera9 means Character. (N) means Numeric.
|
||||
This means that they are around the character at testing. E.g., "a$a" is a testcase for (C), "0$0" is a testcase for (N).</p>
|
||||
<p>Gecko is not breaking the lines on most western language context. But for file paths, URLs and very long word which is connected hyphens,
|
||||
some characters might be breakable. They are 'breakable' in the table. However, they are not always breakable,
|
||||
they <em>depend on the context</em> in the word.</p>
|
||||
<table border="1">
|
||||
<thead>
|
||||
<tr><th colspan="2">character</th><th>Gecko(C)</th><th>Gecko(N)</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
|
||||
<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
|
||||
</thead>
|
||||
<tfoot>
|
||||
<tr><th colspan="2">character</th><th>Gecko(C)</th><th>Gecko(N)</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
|
||||
<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
|
||||
</tfoot>
|
||||
<tbody>
|
||||
<tr><th>0x21</th><th>!</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x22</th><th>"</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x23</th><th>#</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x24</th><th>$</th><td>B</td><td>B</td><td></td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x25</th><th>%</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x26</th><th>&</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x27</th><th>'</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x28</th><th>(</th><td>B</td><td>B</td><td>B</td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x29</th><th>)</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x2A</th><th>*</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x2B</th><th>+</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x2C</th><th>,</th><td>A</td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x2D</th><th>-</th><td>A</td><td></td><td>BA</td><td>BA</td><td>A</td><td>A</td></tr>
|
||||
<tr><th>0x2E</th><th>.</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x2F</th><th>/</th><td>A</td><td></td><td></td><td></td><td>A</td><td>A</td></tr>
|
||||
<tr><th>0x21</th><th>!</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x22</th><th>"</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x23</th><th>#</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x24</th><th>$</th><td></td><td></td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x25</th><th>%</th><td>breakable</td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x26</th><th>&</th><td>breakable</td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x27</th><th>'</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x28</th><th>(</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x29</th><th>)</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x2A</th><th>*</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x2B</th><th>+</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x2C</th><th>,</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x2D</th><th>-</th><td>breakable</td><td>BA</td><td>BA</td><td>A</td><td>A</td></tr>
|
||||
<tr><th>0x2E</th><th>.</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x2F</th><th>/</th><td>breakable</td><td></td><td></td><td>A</td><td>A</td></tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr><th>0x3A</th><th>:</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3B</th><th>;</th><td>A</td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3C</th><th><</th><td>B</td><td>B</td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3D</th><th>=</th><td>A</td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3E</th><th>></th><td>A</td><td>A</td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3F</th><th>?</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x3A</th><th>:</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3B</th><th>;</th><td>breakable</td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3C</th><th><</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3D</th><th>=</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3E</th><th>></th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x3F</th><th>?</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr><th>0x40</th><th>@</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x40</th><th>@</th><td></td><td></td><td></td><td></td></tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr><th>0x5B</th><th>[</th><td>B</td><td>B</td><td>B</td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x5C</th><th>\</th><td>B</td><td>B</td><td></td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x5D</th><th>]</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x5E</th><th>^</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x5F</th><th>_</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x5B</th><th>[</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x5C</th><th>\</th><td>breakable</td><td></td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x5D</th><th>]</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x5E</th><th>^</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x5F</th><th>_</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr><th>0x60</th><th>`</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x60</th><th>`</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr><th>0x7B</th><th>{</th><td>B</td><td>B</td><td>B</td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x7C</th><th>|</th><td></td><td></td><td></td><td></td><td>A</td><td>A</td></tr>
|
||||
<tr><th>0x7D</th><th>}</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x7E</th><th>~</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0x7B</th><th>{</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0x7C</th><th>|</th><td></td><td></td><td></td><td>A</td><td>A</td></tr>
|
||||
<tr><th>0x7D</th><th>}</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0x7E</th><th>~</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr><th>0xA1</th><th>¡</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA2</th><th>¢</th><td>BA</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0xA3</th><th>£</th><td>BA</td><td>B</td><td></td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0xA4</th><th>¤</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA5</th><th>¥</th><td>BA</td><td>B</td><td></td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0xA6</th><th>¦</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA7</th><th>§</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA8</th><th>¨</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA9</th><th>©</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAA</th><th>ª</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAB</th><th>«</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAC</th><th>¬</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAE</th><th>®</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAF</th><th>¯</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA1</th><th>¡</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA2</th><th>¢</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0xA3</th><th>£</th><td></td><td></td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0xA4</th><th>¤</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA5</th><th>¥</th><td></td><td></td><td>B</td><td></td><td></td></tr>
|
||||
<tr><th>0xA6</th><th>¦</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA7</th><th>§</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA8</th><th>¨</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xA9</th><th>©</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAA</th><th>ª</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAB</th><th>«</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAC</th><th>¬</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAE</th><th>®</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xAF</th><th>¯</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr><th>0xB0</th><th>°</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0xB1</th><th>±</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB2</th><th>²</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB3</th><th>³</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB4</th><th>´</th><td></td><td></td><td></td><td></td><td>B</td><td>B</td></tr>
|
||||
<tr><th>0xB5</th><th>µ</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB6</th><th>¶</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB7</th><th>·</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB8</th><th>¸</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB9</th><th>¹</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBA</th><th>º</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBB</th><th>»</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBC</th><th>¼</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBD</th><th>½</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBE</th><th>¾</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBF</th><th>¿</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB0</th><th>°</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
|
||||
<tr><th>0xB1</th><th>±</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB2</th><th>²</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB3</th><th>³</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB4</th><th>´</th><td></td><td></td><td></td><td>B</td><td>B</td></tr>
|
||||
<tr><th>0xB5</th><th>µ</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB6</th><th>¶</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB7</th><th>·</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB8</th><th>¸</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xB9</th><th>¹</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBA</th><th>º</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBB</th><th>»</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBC</th><th>¼</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBD</th><th>½</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBE</th><th>¾</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xBF</th><th>¿</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr><th>0xD7</th><th>×</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xD7</th><th>×</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr><th>0xF7</th><th>÷</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><th>0xF7</th><th>÷</th><td></td><td></td><td></td><td></td><td></td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</body>
|
||||
|
|
Загрузка…
Ссылка в новой задаче