Bug 389056 Don't break line between periods and quote/Bug 388096 add line�]breaking opportunity after U+058A ARMENIAN HYPHEN, U+2010 HYPHEN, U+2012 FIGURE DASH and U+2013 EN DASH. And U+2014 EM DASH should be breakable before and after except that between the same characters/Bug 389595 Don't break around '(' and ')'/Bug 390920 Don't break around   r+sr=roc, b1.9=dbaron

This commit is contained in:
masayuki%d-toybox.com 2007-09-19 06:34:25 +00:00
Родитель 86af6ecafb
Коммит 2725276ed0
9 изменённых файлов: 1102 добавлений и 880 удалений

Просмотреть файл

@ -40,6 +40,7 @@
#include "nsString.h"
#include "nsTArray.h"
#include "nsILineBreaker.h"
class nsIAtom;
@ -81,10 +82,7 @@ public:
nsLineBreaker();
~nsLineBreaker();
static inline PRBool IsSpace(PRUnichar u)
{
return u == 0x0020 || u == 0x200b/*ZWSP*/ || u == '\n' || u == '\t';
}
static inline PRBool IsSpace(PRUnichar u) { return NS_IsSpace(u); }
static inline PRBool IsComplexASCIIChar(PRUnichar u)
{

Просмотреть файл

@ -72,4 +72,18 @@ public:
NS_DEFINE_STATIC_IID_ACCESSOR(nsILineBreaker, NS_ILINEBREAKER_IID)
static inline PRBool
NS_IsSpace(PRUnichar u)
{
return u == 0x0020 || // SPACE
u == 0x0009 || // CHARACTER TABULATION
u == 0x000D || // CARRIAGE RETURN
(0x2000 <= u && u <= 0x2006) || // EN QUAD, EM QUAD, EN SPACE,
// EM SPACE, THREE-PER-EM SPACE,
// FOUR-PER-SPACE, SIX-PER-EM SPACE,
(0x2008 <= u && u <= 0x200B) || // PUNCTUATION SPACE, THIN SPACE,
// HAIR SPACE, ZERO WIDTH SPACE
u == 0x3000; // IDEOGRAPHIC SPACE
}
#endif /* nsILineBreaker_h__ */

Просмотреть файл

@ -43,104 +43,104 @@ static const PRUint32 gLBClass00[32] = {
0x55555555, // U+0008 - U+000F
0x55555555, // U+0010 - U+0017
0x55555555, // U+0018 - U+001F
0x88108815, // U+0020 - U+0027
0x11118810, // U+0028 - U+002F
0x7AABAAA5, // U+0020 - U+0027
0x7A7AAAA9, // U+0028 - U+002F
0x66666666, // U+0030 - U+0037
0x11101866, // U+0038 - U+003F
0x88888888, // U+0040 - U+0047
0x88888888, // U+0048 - U+004F
0x88888888, // U+0050 - U+0057
0x88100888, // U+0058 - U+005F
0x88888888, // U+0060 - U+0067
0x88888888, // U+0068 - U+006F
0x88888888, // U+0070 - U+0077
0x88180888, // U+0078 - U+007F
0x88888888, // U+0080 - U+0087
0x88888888, // U+0088 - U+008F
0x88888888, // U+0090 - U+0097
0x88888888, // U+0098 - U+009F
0x88383488, // U+00A0 - U+00A7
0x88888888, // U+00A8 - U+00AF
0x88888881, // U+00B0 - U+00B7
0x88888888, // U+00B8 - U+00BF
0x88888888, // U+00C0 - U+00C7
0x88888888, // U+00C8 - U+00CF
0x88888888, // U+00D0 - U+00D7
0x88888888, // U+00D8 - U+00DF
0x88888888, // U+00E0 - U+00E7
0x88888888, // U+00E8 - U+00EF
0x88888888, // U+00F0 - U+00F7
0x88888888, // U+00F8 - U+00FF
0x1AA9AA66, // U+0038 - U+003F
0x77777777, // U+0040 - U+0047
0x77777777, // U+0048 - U+004F
0x77777777, // U+0050 - U+0057
0x77AA9777, // U+0058 - U+005F
0x77777777, // U+0060 - U+0067
0x77777777, // U+0068 - U+006F
0x77777777, // U+0070 - U+0077
0x7AAA9777, // U+0078 - U+007F
0x77777777, // U+0080 - U+0087
0x77777777, // U+0088 - U+008F
0x77777777, // U+0090 - U+0097
0x77777777, // U+0098 - U+009F
0xAA9A9AAB, // U+00A0 - U+00A7
0x77A9A77A, // U+00A8 - U+00AF
0xAAAAAAAA, // U+00B0 - U+00B7
0xAAAAAAAA, // U+00B8 - U+00BF
0x77777777, // U+00C0 - U+00C7
0x77777777, // U+00C8 - U+00CF
0x77777777, // U+00D0 - U+00D7
0x77777777, // U+00D8 - U+00DF
0x77777777, // U+00E0 - U+00E7
0x77777777, // U+00E8 - U+00EF
0xA7777777, // U+00F0 - U+00F7
0x77777777, // U+00F8 - U+00FF
};
static const PRUint32 gLBClass20[32] = {
0x55555555, // U+2000 - U+2007
0x88885555, // U+2008 - U+200F
0x88828888, // U+2010 - U+2017
0x88888888, // U+2018 - U+201F
0x81118888, // U+2020 - U+2027
0x88888888, // U+2028 - U+202F
0x88884444, // U+2030 - U+2037
0x88815888, // U+2038 - U+203F
0x88818888, // U+2040 - U+2047
0x88888888, // U+2048 - U+204F
0x88888888, // U+2050 - U+2057
0x88888888, // U+2058 - U+205F
0x88888888, // U+2060 - U+2067
0x88888888, // U+2068 - U+206F
0x88888888, // U+2070 - U+2077
0x88888888, // U+2078 - U+207F
0x88888888, // U+2080 - U+2087
0x88888888, // U+2088 - U+208F
0x88888888, // U+2090 - U+2097
0x88888888, // U+2098 - U+209F
0x88888888, // U+20A0 - U+20A7
0x88888888, // U+20A8 - U+20AF
0x88888888, // U+20B0 - U+20B7
0x88888888, // U+20B8 - U+20BF
0x88888888, // U+20C0 - U+20C7
0x88888888, // U+20C8 - U+20CF
0x88888888, // U+20D0 - U+20D7
0x88888888, // U+20D8 - U+20DF
0x88888888, // U+20E0 - U+20E7
0x88888888, // U+20E8 - U+20EF
0x88888888, // U+20F0 - U+20F7
0x88888888, // U+20F8 - U+20FF
0xB5555555, // U+2000 - U+2007
0x77775555, // U+2008 - U+200F
0x777211B1, // U+2010 - U+2017
0x77777777, // U+2018 - U+201F
0xA2227777, // U+2020 - U+2027
0xB7777777, // U+2028 - U+202F
0x77744444, // U+2030 - U+2037
0x7A115107, // U+2038 - U+203F
0x11017777, // U+2040 - U+2047
0x77777711, // U+2048 - U+204F
0x77777777, // U+2050 - U+2057
0x77777777, // U+2058 - U+205F
0x77777777, // U+2060 - U+2067
0x77777777, // U+2068 - U+206F
0x77777777, // U+2070 - U+2077
0x77777777, // U+2078 - U+207F
0x77777777, // U+2080 - U+2087
0x77777777, // U+2088 - U+208F
0x77777777, // U+2090 - U+2097
0x77777777, // U+2098 - U+209F
0x77777777, // U+20A0 - U+20A7
0x77777777, // U+20A8 - U+20AF
0x77777777, // U+20B0 - U+20B7
0x77777777, // U+20B8 - U+20BF
0x77777777, // U+20C0 - U+20C7
0x77777777, // U+20C8 - U+20CF
0x77777777, // U+20D0 - U+20D7
0x77777777, // U+20D8 - U+20DF
0x77777777, // U+20E0 - U+20E7
0x77777777, // U+20E8 - U+20EF
0x77777777, // U+20F0 - U+20F7
0x77777777, // U+20F8 - U+20FF
};
static const PRUint32 gLBClass21[32] = {
0x88888888, // U+2100 - U+2107
0x88888888, // U+2108 - U+210F
0x83888888, // U+2110 - U+2117
0x88888888, // U+2118 - U+211F
0x87888888, // U+2120 - U+2127
0x88888888, // U+2128 - U+212F
0x88888888, // U+2130 - U+2137
0x88888888, // U+2138 - U+213F
0x88888888, // U+2140 - U+2147
0x88888888, // U+2148 - U+214F
0x88888888, // U+2150 - U+2157
0x88888888, // U+2158 - U+215F
0x77777777, // U+2100 - U+2107
0x77777777, // U+2108 - U+210F
0x73777777, // U+2110 - U+2117
0x77777777, // U+2118 - U+211F
0x77777777, // U+2120 - U+2127
0x77777777, // U+2128 - U+212F
0x77777777, // U+2130 - U+2137
0x77777777, // U+2138 - U+213F
0x77777777, // U+2140 - U+2147
0x77777777, // U+2148 - U+214F
0x77777777, // U+2150 - U+2157
0x77777777, // U+2158 - U+215F
0x55555555, // U+2160 - U+2167
0x55555555, // U+2168 - U+216F
0x55555555, // U+2170 - U+2177
0x55555555, // U+2178 - U+217F
0x88888888, // U+2180 - U+2187
0x88888888, // U+2188 - U+218F
0x88888888, // U+2190 - U+2197
0x88888888, // U+2198 - U+219F
0x88888888, // U+21A0 - U+21A7
0x88888888, // U+21A8 - U+21AF
0x88888888, // U+21B0 - U+21B7
0x88888888, // U+21B8 - U+21BF
0x88888888, // U+21C0 - U+21C7
0x88888888, // U+21C8 - U+21CF
0x88888888, // U+21D0 - U+21D7
0x88888888, // U+21D8 - U+21DF
0x88888888, // U+21E0 - U+21E7
0x88888888, // U+21E8 - U+21EF
0x88888888, // U+21F0 - U+21F7
0x88888888, // U+21F8 - U+21FF
0x77777777, // U+2180 - U+2187
0x77777777, // U+2188 - U+218F
0x77777777, // U+2190 - U+2197
0x77777777, // U+2198 - U+219F
0x77777777, // U+21A0 - U+21A7
0x77777777, // U+21A8 - U+21AF
0x77777777, // U+21B0 - U+21B7
0x77777777, // U+21B8 - U+21BF
0x77777777, // U+21C0 - U+21C7
0x77777777, // U+21C8 - U+21CF
0x77777777, // U+21D0 - U+21D7
0x77777777, // U+21D8 - U+21DF
0x77777777, // U+21E0 - U+21E7
0x77777777, // U+21E8 - U+21EF
0x77777777, // U+21F0 - U+21F7
0x77777777, // U+21F8 - U+21FF
};
static const PRUint32 gLBClass30[32] = {
@ -179,37 +179,37 @@ static const PRUint32 gLBClass30[32] = {
};
static const PRUint32 gLBClass0E[32] = {
0x99999999, // U+0E00 - U+0E07
0x99999999, // U+0E08 - U+0E0F
0x99999999, // U+0E10 - U+0E17
0x99999999, // U+0E18 - U+0E1F
0x99999999, // U+0E20 - U+0E27
0x19999999, // U+0E28 - U+0E2F
0x99999999, // U+0E30 - U+0E37
0x09999999, // U+0E38 - U+0E3F
0x91999999, // U+0E40 - U+0E47
0x89999999, // U+0E48 - U+0E4F
0x88888888, // U+0E00 - U+0E07
0x88888888, // U+0E08 - U+0E0F
0x88888888, // U+0E10 - U+0E17
0x88888888, // U+0E18 - U+0E1F
0x88888888, // U+0E20 - U+0E27
0x18888888, // U+0E28 - U+0E2F
0x88888888, // U+0E30 - U+0E37
0x08888888, // U+0E38 - U+0E3F
0x81888888, // U+0E40 - U+0E47
0x78888888, // U+0E48 - U+0E4F
0x66666666, // U+0E50 - U+0E57
0x99991166, // U+0E58 - U+0E5F
0x99999999, // U+0E60 - U+0E67
0x99999999, // U+0E68 - U+0E6F
0x99999999, // U+0E70 - U+0E77
0x99999999, // U+0E78 - U+0E7F
0x99999999, // U+0E80 - U+0E87
0x99999999, // U+0E88 - U+0E8F
0x99999999, // U+0E90 - U+0E97
0x99999999, // U+0E98 - U+0E9F
0x99999999, // U+0EA0 - U+0EA7
0x19999999, // U+0EA8 - U+0EAF
0x99999999, // U+0EB0 - U+0EB7
0x99999999, // U+0EB8 - U+0EBF
0x91999999, // U+0EC0 - U+0EC7
0x99999999, // U+0EC8 - U+0ECF
0x88881166, // U+0E58 - U+0E5F
0x88888888, // U+0E60 - U+0E67
0x88888888, // U+0E68 - U+0E6F
0x88888888, // U+0E70 - U+0E77
0x88888888, // U+0E78 - U+0E7F
0x88888888, // U+0E80 - U+0E87
0x88888888, // U+0E88 - U+0E8F
0x88888888, // U+0E90 - U+0E97
0x88888888, // U+0E98 - U+0E9F
0x88888888, // U+0EA0 - U+0EA7
0x18888888, // U+0EA8 - U+0EAF
0x88888888, // U+0EB0 - U+0EB7
0x88888888, // U+0EB8 - U+0EBF
0x81888888, // U+0EC0 - U+0EC7
0x88888888, // U+0EC8 - U+0ECF
0x66666666, // U+0ED0 - U+0ED7
0x99999966, // U+0ED8 - U+0EDF
0x99999999, // U+0EE0 - U+0EE7
0x99999999, // U+0EE8 - U+0EEF
0x99999999, // U+0EF0 - U+0EF7
0x99999999, // U+0EF8 - U+0EFF
0x88888866, // U+0ED8 - U+0EDF
0x88888888, // U+0EE0 - U+0EE7
0x88888888, // U+0EE8 - U+0EEF
0x88888888, // U+0EF0 - U+0EF7
0x88888888, // U+0EF8 - U+0EFF
};

Просмотреть файл

@ -66,78 +66,97 @@
4 X X X X X X
5 X X X X X X
6 X X X X X X
7 X X X X X X X
8 X X X X X X E
7 X X X X X X X
8 X X X X X X E
9 X X X X X X
10 X X X X X X
11 X X X X X X
12 X X X X X X
12 X X X X X X
13 X X X X X X X
14 X X X X X X X
15 X X X X X X X X X
15 X X X X X X X X X
16 X X X X X X X X
17 X X X X X E
18 X X X X X X X X X
17 X X X X X E
18 X X X X X X X X X
19 X E E E E E X X X X X X X X X X X X E X E E
20 X X X X X E
* Same Char
# Other Char
X Cannot Break
The classes mean:
1: Open parenthesis
2: Close parenthesis
3: Prohibit a line break before
4: Punctuation for sentence end (except Full stop, e.g., "!" and "?")
5: Middle dot (e.g., U+30FB KATAKANA MIDDLE DOT)
6: Full stop
7: Non-breakable between same characters
8: Prefix (e.g., "$", "NO.")
9: Postfix (e.g., "%")
10: Ideographic space
11: Hiragana
12: Japanese characters (except class 11)
13: Subscript
14: Ruby
15: Numeric
16: Alphabet
17: Space for Western language
18: Western characters (except class 17)
19: Split line note (Warichu) begin quote
20: Split line note (Warichu) end quote
2. Simplified by remove the class which we do not care
However, since we do not care about class 13(Subscript), 14(Ruby),
19(split line note begin quote), and 20(split line note end quote)
we can simplify this par table into the following
However, since we do not care about class 13(Subscript), 14(Ruby),
16 (Aphabet), 19(split line note begin quote), and 20(split line note end
quote) we can simplify this par table into the following
Class of
Leading Class of Trailing Char Class
Char
Char
1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18
1 X X X X X X X X X X X X X X X X
2 X X X X X
3 X X X X X
1 2 3 4 5 6 7 8 9 10 11 12 15 17 18
1 X X X X X X X X X X X X X X X
2 X X X X X
3 X X X X X
4 X X X X X
5 X X X X X
6 X X X X X
7 X X X X X X
8 X X X X X X
9 X X X X X
10 X X X X X
11 X X X X X
12 X X X X X
15 X X X X X X X X
16 X X X X X X X
17 X X X X X
18 X X X X X X X X
5 X X X X X
6 X X X X X
7 X X X X X X
8 X X X X X X
9 X X X X X
10 X X X X X
11 X X X X X
12 X X X X X
15 X X X X X X X X
17 X X X X X
18 X X X X X X X
3. Simplified by merged classes
After the 2 simplification, the pair table have some duplication
After the 2 simplification, the pair table have some duplication
a. class 2, 3, 4, 5, 6, are the same- we can merged them
b. class 10, 11, 12, 17 are the same- we can merged them
Class of
Leading Class of Trailing Char Class
Char
Char
1 [a] 7 8 9 [b]15 16 18
1 X X X X X X X X X
[a] X
7 X X
8 X X
9 X
[b] X
15 X X X X
16 X X X
18 X X X X
1 [a] 7 8 9 [b]15 18
1 X X X X X X X X
[a] X
7 X X
8 X X
9 X
[b] X
15 X X X X
18 X X X
4. We add COMPLEX characters and make it breakable w/ all ther class
@ -145,41 +164,173 @@
Class of
Leading Class of Trailing Char Class
Char
Char
1 [a] 7 8 9 [b]15 18 COMPLEX
1 X X X X X X X X X
[a] X
7 X X
8 X X
9 X
[b] X
15 X X X X
18 X X X
COMPLEX X T
1 [a] 7 8 9 [b]15 16 18 COMPLEX
1 X X X X X X X X X X
[a] X
7 X X
8 X X
9 X
[b] X
15 X X X X
16 X X X
18 X X X X
COMPLEX X T
T : need special handling
5. Now we use one bit to encode weather it is breakable, and use 2 bytes
5. However, we need two special class for some punctuations/parentheses,
theirs breaking rules like character class (18), see bug 389056.
And also we need character like punctuation that is same behavior with 18,
but the characters are not letters of all languages. (e.g., '_')
[c]. Based on open parenthesis class (1), but it is not breakable after
character class (18) or numeric class (15).
[d]. Based on close parenthesis (or punctuation) class (2), but it is not
breakable before character class (18) or numeric class (15).
Class of
Leading Class of Trailing Char Class
Char
1 [a] 7 8 9 [b]15 18 COMPLEX [c] [d]
1 X X X X X X X X X X X
[a] X X X
7 X X
8 X X
9 X
[b] X X
15 X X X X X X
18 X X X X X
COMPLEX X T
[c] X X X X X X X X X X X
[d] X X X X
6. And Unicode has "NON-BREAK" characters. The lines should be broken around
them. But in JIS X 4051, such class is not, therefore, we create [e].
Class of
Leading Class of Trailing Char Class
Char
1 [a] 7 8 9 [b]15 18 COMPLEX [c] [d] [e]
1 X X X X X X X X X X X X
[a] X X X X
7 X X X
8 X X X
9 X X
[b] X X X
15 X X X X X X X
18 X X X X X X
COMPLEX X T X
[c] X X X X X X X X X X X X
[d] X X X X X
[e] X X X X X X X X X X X X
7. Now we use one bit to encode weather it is breakable, and use 2 bytes
for one row, then the bit table will look like:
18 <- 1
1 0000 0011 1111 1111 = 0x03FF
[a] 0000 0000 0000 0010 = 0x0002
7 0000 0000 0000 0110 = 0x0006
8 0000 0000 0100 0010 = 0x0042
9 0000 0000 0000 0010 = 0x0002
[b] 0000 0000 0000 0010 = 0x0002
15 0000 0001 0101 0010 = 0x0152
16 0000 0001 1000 0010 = 0x0182
18 0000 0001 1100 0010 = 0x01C2
COMPLEX 0000 0010 0000 0010 = 0x0202
5. Now we map the class to number
1 0000 1111 1111 1111 = 0x0FFF
[a] 0000 1110 0000 0010 = 0x0E02
7 0000 1000 0000 0110 = 0x0806
8 0000 1000 0100 0010 = 0x0842
9 0000 1000 0000 0010 = 0x0802
[b] 0000 1100 0000 0010 = 0x0C02
15 0000 1110 1101 0010 = 0x0ED2
18 0000 1110 1100 0010 = 0x0EC2
COMPLEX 0000 1001 0000 0010 = 0x0902
[c] 0000 1111 1111 1111 = 0x0FFF
[d] 0000 1100 1100 0010 = 0x0CC2
[e] 0000 1111 1111 1111 = 0x0FFF
*/
#define MAX_CLASSES 12
static const PRUint16 gPair[MAX_CLASSES] = {
0x0FFF,
0x0E02,
0x0806,
0x0842,
0x0802,
0x0C02,
0x0ED2,
0x0EC2,
0x0902,
0x0FFF,
0x0CC2,
0x0FFF
};
/*
8. And if the character is not enough far from word start, word end and
another break point, we should not break in non-CJK languages.
I.e., Don't break around 15, 18, [c] and [d], but don't change
that if they are related to [b].
Class of
Leading Class of Trailing Char Class
Char
1 [a] 7 8 9 [b]15 18 COMPLEX [c] [d] [e]
1 X X X X X X X X X X X X
[a] X X X X X X
7 X X X X X X X
8 X X X X X X
9 X X X X X X
[b] X X X
15 X X X X X X X X X X X
18 X X X X X X X X X X X
COMPLEX X X X T X X X
[c] X X X X X X X X X X X X
[d] X X X X X X X X X X X
[e] X X X X X X X X X X X X
18 <- 1
1 0000 1111 1111 1111 = 0x0FFF
[a] 0000 1110 1100 0010 = 0x0EC2
7 0000 1110 1100 0110 = 0x0EC6
8 0000 1110 1100 0010 = 0x0EC2
9 0000 1110 1100 0010 = 0x0EC2
[b] 0000 1100 0000 0010 = 0x0C02
15 0000 1111 1101 1111 = 0x0FDF
18 0000 1111 1101 1111 = 0x0FDF
COMPLEX 0000 1111 1100 0010 = 0x0FC2
[c] 0000 1111 1111 1111 = 0x0FFF
[d] 0000 1111 1101 1111 = 0x0EDF
[e] 0000 1111 1111 1111 = 0x0FFF
*/
static const PRUint16 gPairConservative[MAX_CLASSES] = {
0x0FFF,
0x0EC2,
0x0EC6,
0x0EC2,
0x0EC2,
0x0C02,
0x0FDF,
0x0FDF,
0x0FC2,
0x0FFF,
0x0EDF,
0x0FFF
};
/*
9. Now we map the class to number
0: 1
1: [a]- 2, 3, 4, 5, 6
2: 7
@ -187,27 +338,59 @@
4: 9
5: [b]- 10, 11, 12, 17
6: 15
7: 16
8: 18
9: COMPLEX
7: 18
8: COMPLEX
9: [c]
A: [d]
B: [e]
and they mean:
0: Open parenthesis
1: Punctuation that prohibits break before
2: Non-breakable between same classes
3: Prefix
4: Postfix
5: Breakable character (Spaces and Most Japanese characters)
6: Numeric
7: Characters
8: Need special handling characters (E.g., Thai)
9: Open parentheses like Character (See bug 389056)
A: Close parenthese (or punctuations) like Character (See bug 389056)
B: Non breakable (See bug 390920)
*/
#define MAX_CLASSES 10
#define CLASS_NONE PR_INT8_MAX
static const PRUint16 gPair[MAX_CLASSES] = {
0x03FF,
0x0002,
0x0006,
0x0042,
0x0002,
0x0002,
0x0152,
0x0182,
0x01C2,
0x0202
};
#define CLASS_OPEN 0x00
#define CLASS_CLOSE 0x01
#define CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS 0x02
#define CLASS_PREFIX 0x03
#define CLASS_POSTFFIX 0x04
#define CLASS_BREAKABLE 0x05
#define CLASS_NUMERIC 0x06
#define CLASS_CHARACTER 0x07
#define CLASS_COMPLEX 0x08
#define CLASS_OPEN_LIKE_CHARACTER 0x09
#define CLASS_CLOSE_LIKE_CHARACTER 0x0A
#define CLASS_NON_BREAKABLE 0x0B
#define U_NULL PRUnichar(0x0000)
#define U_SLASH PRUnichar('/')
#define U_SPACE PRUnichar(' ')
#define U_HYPHEN PRUnichar('-')
#define U_EQUAL PRUnichar('=')
#define U_PERCENT PRUnichar('%')
#define U_AMPERSAND PRUnichar('&')
#define U_BACKSLASH PRUnichar('\\')
#define NEED_CONTEXTUAL_ANALYSIS(c) (IS_HYPHEN(c) || \
(c) == U_SLASH || \
(c) == U_PERCENT || \
(c) == U_AMPERSAND || \
(c) == U_BACKSLASH)
#define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039)
static inline int
GETCLASSFROMTABLE(const PRUint32* t, PRUint16 l)
@ -215,10 +398,6 @@ GETCLASSFROMTABLE(const PRUint32* t, PRUint16 l)
return ((((t)[(l>>3)]) >> ((l & 0x0007)<<2)) & 0x000f);
}
#define CLASS_COMPLEX 9
static inline int
IS_HALFWIDTH_IN_JISx4051_CLASS3(PRUnichar u)
{
@ -240,54 +419,48 @@ IS_COMPLEX(PRUnichar u)
return (0x0e01 <= (u) && (u) <= 0x0e5b);
}
static inline int
IS_SPACE(PRUnichar u)
static inline PRBool
IS_NONBREAKABLE_SPACE(PRUnichar u)
{
return ((u) == 0x0020 || (u) == 0x0009 || (u) == 0x000a || (u) == 0x000d || (u)==0x200b);
return u == 0x00A0 || u == 0x2007; // NO-BREAK SPACE, FIGURE SPACE
}
static PRInt8 GetClass(PRUnichar u)
static inline PRBool
IS_HYPHEN(PRUnichar u)
{
return (u == U_HYPHEN ||
u == 0x058A || // ARMENIAN HYPHEN
u == 0x2010 || // HYPHEN
u == 0x2012); // FIGURE DASH
}
static PRInt8
GetClass(PRUnichar u)
{
PRUint16 h = u & 0xFF00;
PRUint16 l = u & 0x00ff;
PRInt8 c;
// Handle 3 range table first
if( 0x0000 == h)
{
if (0x0000 == h) {
c = GETCLASSFROMTABLE(gLBClass00, l);
}
else if( 0x0E00 == h)
{
} else if (0x0E00 == h) {
c = GETCLASSFROMTABLE(gLBClass0E, l);
}
else if( 0x2000 == h)
{
} else if (0x2000 == h) {
c = GETCLASSFROMTABLE(gLBClass20, l);
}
else if( 0x2100 == h)
{
} else if (0x2100 == h) {
c = GETCLASSFROMTABLE(gLBClass21, l);
}
else if( 0x3000 == h)
{
} else if (0x3000 == h) {
c = GETCLASSFROMTABLE(gLBClass30, l);
}
else if ( ( ( 0x3200 <= u) && ( u <= 0xA4CF) ) || // CJK and Yi
( ( 0xAC00 <= h) && ( h <= 0xD7FF) ) || // Hangul
( ( 0xf900 <= h) && ( h <= 0xfaff) )
)
{
c = 5; // CJK character, Han, and Han Compatability
}
else if( 0xff00 == h)
{
if( l < 0x0060) // Fullwidth ASCII variant
{
} else if (((0x3200 <= u) && (u <= 0xA4CF)) || // CJK and Yi
((0xAC00 <= h) && (h <= 0xD7FF)) || // Hangul
((0xf900 <= h) && (h <= 0xfaff))) {
c = CLASS_BREAKABLE; // CJK character, Han, and Han Compatability
} else if (0xff00 == h) {
if (l < 0x0060) { // Fullwidth ASCII variant
c = GETCLASSFROMTABLE(gLBClass00, (l+0x20));
} else if (l < 0x00a0) {
switch (l)
{
switch (l) {
case 0x61: c = GetClass(0x3002); break;
case 0x62: c = GetClass(0x300c); break;
case 0x63: c = GetClass(0x300d); break;
@ -296,53 +469,77 @@ static PRInt8 GetClass(PRUnichar u)
case 0x9e: c = GetClass(0x309b); break;
case 0x9f: c = GetClass(0x309c); break;
default:
if(IS_HALFWIDTH_IN_JISx4051_CLASS3(u))
c = 1; // jis x4051 class 3
if (IS_HALFWIDTH_IN_JISx4051_CLASS3(u))
c = CLASS_CLOSE; // jis x4051 class 3
else
c = 5; // jis x4051 class 11
c = CLASS_BREAKABLE; // jis x4051 class 11
break;
}
// Halfwidth Katakana variants
} else if( l < 0x00e0) {
c = 8; // Halfwidth Hangul variants
} else if( l < 0x00f0) {
static PRUnichar NarrowFFEx[16] =
{
// Halfwidth Katakana variants
} else if (l < 0x00e0) {
c = CLASS_CHARACTER; // Halfwidth Hangul variants
} else if (l < 0x00f0) {
static PRUnichar NarrowFFEx[16] = {
0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0x0000,
0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0x0000
};
c = GetClass(NarrowFFEx[l - 0x00e0]);
} else {
c = 8;
c = CLASS_CHARACTER;
}
}
else if( 0x3100 == h) {
if ( l <= 0xbf) { // Hangul Compatibility Jamo, Bopomofo, Kanbun
// XXX: This is per UAX #14, but UAX #14 may change
// the line breaking rules about Kanbun and Bopomofo.
c = 5;
} else if (0x3100 == h) {
if (l <= 0xbf) { // Hangul Compatibility Jamo, Bopomofo, Kanbun
// XXX: This is per UAX #14, but UAX #14 may change
// the line breaking rules about Kanbun and Bopomofo.
c = CLASS_BREAKABLE;
} else if (l >= 0xf0) { // Katakana small letters for Ainu
c = CLASS_CLOSE;
} else { // unassigned
c = CLASS_CHARACTER;
}
else if ( l >= 0xf0)
{ // Katakana small letters for Ainu
c = 1;
}
else // unassigned
{
c = 8;
}
}
else {
c = 8; // others
} else if (0x0300 == h) {
if (0x4F == l || (0x5C <= l && l <= 0x62))
c = CLASS_NON_BREAKABLE;
else
c = CLASS_CHARACTER;
} else if (0x0500 == h) {
// ARMENIAN HYPHEN (for "Breaking Hyphens" of UAX#14)
if (l == 0x8A)
c = GETCLASSFROMTABLE(gLBClass00, PRUint16(U_HYPHEN));
else
c = CLASS_CHARACTER;
} else if (0x0F00 == h) {
if (0x08 == l || 0x0C == l || 0x12 == l)
c = CLASS_NON_BREAKABLE;
else
c = CLASS_CHARACTER;
} else if (0x1800 == h) {
if (0x0E == l)
c = CLASS_NON_BREAKABLE;
else
c = CLASS_CHARACTER;
} else {
c = CLASS_CHARACTER; // others
}
return c;
}
static PRBool GetPair(PRInt8 c1, PRInt8 c2)
static PRBool
GetPair(PRInt8 c1, PRInt8 c2)
{
NS_ASSERTION( c1 < MAX_CLASSES ,"illegal classes 1");
NS_ASSERTION( c2 < MAX_CLASSES ,"illegal classes 2");
NS_ASSERTION(c1 < MAX_CLASSES ,"illegal classes 1");
NS_ASSERTION(c2 < MAX_CLASSES ,"illegal classes 2");
return (0 == ((gPair[c1] >> c2 ) & 0x0001));
return (0 == ((gPair[c1] >> c2) & 0x0001));
}
static PRBool
GetPairConservative(PRInt8 c1, PRInt8 c2)
{
NS_ASSERTION(c1 < MAX_CLASSES ,"illegal classes 1");
NS_ASSERTION(c2 < MAX_CLASSES ,"illegal classes 2");
return (0 == ((gPairConservative[c1] >> c2) & 0x0001));
}
nsJISx4051LineBreaker::nsJISx4051LineBreaker()
@ -355,81 +552,204 @@ nsJISx4051LineBreaker::~nsJISx4051LineBreaker()
NS_IMPL_ISUPPORTS1(nsJISx4051LineBreaker, nsILineBreaker)
#define U_PERIOD PRUnichar('.')
#define U_COMMA PRUnichar(',')
#define U_SEMICOLON PRUnichar(';')
#define U_SLASH PRUnichar('/')
#define U_SPACE PRUnichar(' ')
#define U_HYPHEN PRUnichar('-')
#define U_EQUAL PRUnichar('=')
#define U_NULL PRUnichar(0x0000)
#define U_RIGHT_SINGLE_QUOTATION_MARK PRUnichar(0x2019)
#define NEED_CONTEXTUAL_ANALYSIS(c) ((c) == U_PERIOD || \
(c) == U_COMMA || \
(c) == U_SEMICOLON || \
(c) == U_SLASH || \
(c) == U_HYPHEN || \
(c) == U_EQUAL || \
(c) == U_RIGHT_SINGLE_QUOTATION_MARK)
#define NUMERIC_CLASS 6 // JIS x4051 class 15 is now map to simplified class 6
#define CHARACTER_CLASS 8 // JIS x4051 class 18 is now map to simplified class 8
#define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039)
class ContextState {
public:
ContextState(const PRUnichar* aText, PRUint32 aLength) {
mUniText = aText;
mText = nsnull;
mLength = aLength;
Init();
}
static PRInt8 ContextualAnalysis(
PRUnichar prev, PRUnichar cur, PRUnichar next)
ContextState(const PRUint8* aText, PRUint32 aLength) {
mUniText = nsnull;
mText = aText;
mLength = aLength;
Init();
}
PRUint32 Length() { return mLength; }
PRUint32 Index() { return mIndex; }
PRUnichar GetCharAt(PRUint32 aIndex) {
NS_ASSERTION(0 <= aIndex && aIndex < mLength, "Out of range!");
return mUniText ? mUniText[aIndex] : PRUnichar(mText[aIndex]);
}
void AdvanceIndexTo(PRUint32 aIndex) {
NS_ASSERTION(mIndex <= aIndex, "the index cannot decrease.");
NS_ASSERTION(aIndex < mLength, "out of range");
mIndex = aIndex;
}
void NotifyBreakBefore() { mLastBreakIndex = mIndex; }
// A word of western language should not be broken. But even if the word has
// only ASCII characters, non-natural context words should be broken, e.g.,
// URL and file path. For protecting the natural words, we should use
// conservative breaking rules at following conditions:
// 1. at near the start of word
// 2. at near the end of word
// 3. at near the latest broken point
// CONSERVATIVE_BREAK_RANGE define the 'near' in characters.
#define CONSERVATIVE_BREAK_RANGE 6
PRBool UseConservativeBreaking(PRUint32 aOffset = 0) {
if (mHasCJKChar)
return PR_FALSE;
PRUint32 index = mIndex + aOffset;
PRBool result = (index < CONSERVATIVE_BREAK_RANGE ||
mLength - index < CONSERVATIVE_BREAK_RANGE ||
index - mLastBreakIndex < CONSERVATIVE_BREAK_RANGE);
if (result || !mHasNonbreakableSpace)
return result;
// This text has no-breakable space, we need to check whether the index
// is near it.
// Note that index is always larger than CONSERVATIVE_BREAK_RANGE here.
for (PRUint32 i = index - 1; index - CONSERVATIVE_BREAK_RANGE < i; --i) {
if (IS_NONBREAKABLE_SPACE(GetCharAt(i)))
return PR_TRUE;
if (i == 0)
break;
}
// Note that index is always less than mLength - CONSERVATIVE_BREAK_RANGE.
for (PRUint32 i = index + 1; i < index + CONSERVATIVE_BREAK_RANGE; ++i) {
if (IS_NONBREAKABLE_SPACE(GetCharAt(i)))
return PR_TRUE;
}
return PR_FALSE;
}
PRBool HasCharacterAlready(PRUnichar aCh) {
// Be careful for the index being unsigned.
if (mIndex == 0)
return PR_FALSE;
for (PRUint32 i = mIndex - 1; 0 < i; --i) {
if (GetCharAt(i) == aCh)
return PR_TRUE;
if (i == 0)
break;
}
return PR_FALSE;
}
PRUnichar GetPreviousNonHyphenCharacter() {
NS_ASSERTION(IS_HYPHEN(GetCharAt(mIndex)),
"current character isn't hyphen");
// Be careful for the index being unsigned.
if (mIndex == 0)
return PR_FALSE;
for (PRUint32 i = mIndex - 1; 0 < i; --i) {
PRUnichar ch = GetCharAt(i);
if (!IS_HYPHEN(ch))
return ch;
if (i == 0)
break;
}
return U_NULL;
}
private:
void Init() {
mIndex = 0;
mLastBreakIndex = 0;
mHasCJKChar = 0;
mHasNonbreakableSpace = 0;
for (PRUint32 i = 0; i < mLength; ++i) {
PRUnichar u = GetCharAt(i);
if (!mHasNonbreakableSpace && IS_NONBREAKABLE_SPACE(u))
mHasNonbreakableSpace = 1;
else if (mUniText && !mHasCJKChar && IS_CJK_CHAR(u))
mHasCJKChar = 1;
}
}
const PRUnichar* mUniText;
const PRUint8* mText;
PRUint32 mIndex;
PRUint32 mLength; // length of text
PRUint32 mLastBreakIndex;
PRPackedBool mHasCJKChar; // if the text has CJK character, this is true.
PRPackedBool mHasNonbreakableSpace; // if the text has no-breakable space,
// this is true.
};
static PRInt8
ContextualAnalysis(PRUnichar prev, PRUnichar cur, PRUnichar next,
ContextState &aState)
{
if(U_COMMA == cur || U_SEMICOLON == cur)
{
if((IS_ASCII_DIGIT(prev) || prev == U_NULL) && IS_ASCII_DIGIT(next))
return NUMERIC_CLASS;
}
else if(U_PERIOD == cur)
{
if((IS_ASCII_DIGIT(prev) || prev == U_SPACE || prev == U_NULL) &&
IS_ASCII_DIGIT(next))
return NUMERIC_CLASS;
// Don't return CLASS_OPEN/CLASS_CLOSE if aState.UseJISX4051 is FALSE.
// By assigning a full stop character class only when it's followed by
// class 6 (numeric), 7, and 8 (character). Note that class 9 (Thai)
// doesn't matter, either way, we prevent lines from breaking around
// full stop in those cases while still allowing it to end a line when
// followed by CJK characters. With an additional condition of it being
// preceded by class 0 or class > 5, we make sure that it does not
// start a line (see bug 164759).
PRUint8 pc = prev != U_NULL ? GetClass(prev) : CHARACTER_CLASS;
if((pc > 5 || pc == 0) && GetClass(next) > 5)
return CHARACTER_CLASS;
}
else if(U_SLASH == cur || U_HYPHEN == cur || U_EQUAL == cur)
{
// if slash is a first character, don't break at this point (e.g., "/root")
if (U_SLASH == cur && prev == U_NULL)
return CHARACTER_CLASS;
if (IS_ASCII_DIGIT(next))
return NUMERIC_CLASS;
}
else if(U_RIGHT_SINGLE_QUOTATION_MARK == cur)
{
// somehow people use this as ' in "it's" sometimes...
if(U_SPACE != next)
return CHARACTER_CLASS;
}
return GetClass(cur);
if (IS_HYPHEN(cur)) {
// If next character is hyphen, we don't need to break between them.
if (IS_HYPHEN(next))
return CLASS_CHARACTER;
// If prev and next characters are numeric, it may be in Math context.
// So, we should not break here.
PRBool prevIsNum = IS_ASCII_DIGIT(prev);
PRBool nextIsNum = IS_ASCII_DIGIT(next);
if (prevIsNum && nextIsNum)
return CLASS_NUMERIC;
// If one side is numeric and the other is a character, or if both sides are
// characters, the hyphen should be breakable.
if (!aState.UseConservativeBreaking(1)) {
PRUnichar prevOfHyphen = aState.GetPreviousNonHyphenCharacter();
if (prevOfHyphen && next) {
PRBool prevIsChar = !NEED_CONTEXTUAL_ANALYSIS(prevOfHyphen) &&
GetClass(prevOfHyphen) == CLASS_CHARACTER;
PRBool nextIsChar = !NEED_CONTEXTUAL_ANALYSIS(next) &&
GetClass(next) == CLASS_CHARACTER;
if ((prevIsNum || prevIsChar) && (nextIsNum || nextIsChar))
return CLASS_CLOSE;
}
}
} else if (cur == U_SLASH || cur == U_BACKSLASH) {
// If this is immediately after same char, we should not break here.
if (prev == cur)
return CLASS_CHARACTER;
// If this text has two or more (BACK)SLASHs, this may be file path or URL.
if (!aState.UseConservativeBreaking() &&
aState.HasCharacterAlready(cur))
return CLASS_OPEN;
} else if (cur == U_PERCENT) {
// If this is a part of the param of URL, we should break before.
if (!aState.UseConservativeBreaking()) {
if (aState.Index() >= 3 &&
aState.GetCharAt(aState.Index() - 3) == U_PERCENT)
return CLASS_OPEN;
if (aState.Index() + 3 < aState.Length() &&
aState.GetCharAt(aState.Index() + 3) == U_PERCENT)
return CLASS_OPEN;
}
} else if (cur == U_AMPERSAND) {
// If this may be a separator of params of URL, we should break after.
if (!aState.UseConservativeBreaking(1) &&
aState.HasCharacterAlready(U_EQUAL))
return CLASS_CLOSE;
} else {
NS_ERROR("Forgot to handle the current character!");
}
return GetClass(cur);
}
PRInt32 nsJISx4051LineBreaker::WordMove(
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos, PRInt8 aDirection)
PRInt32
nsJISx4051LineBreaker::WordMove(const PRUnichar* aText, PRUint32 aLen,
PRUint32 aPos, PRInt8 aDirection)
{
PRBool textNeedsJISx4051 = PR_FALSE;
PRInt32 begin, end;
for (begin = aPos; begin > 0 && !IS_SPACE(aText[begin - 1]); --begin) {
for (begin = aPos; begin > 0 && !NS_IsSpace(aText[begin - 1]); --begin) {
if (IS_CJK_CHAR(aText[begin]) || IS_COMPLEX(aText[begin])) {
textNeedsJISx4051 = PR_TRUE;
}
}
for (end = aPos + 1; end < PRInt32(aLen) && !IS_SPACE(aText[end]); ++end) {
for (end = aPos + 1; end < PRInt32(aLen) && !NS_IsSpace(aText[end]); ++end) {
if (IS_CJK_CHAR(aText[end]) || IS_COMPLEX(aText[end])) {
textNeedsJISx4051 = PR_TRUE;
}
@ -458,8 +778,9 @@ PRInt32 nsJISx4051LineBreaker::WordMove(
return ret;
}
PRInt32 nsJISx4051LineBreaker::Next(
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
PRInt32
nsJISx4051LineBreaker::Next(const PRUnichar* aText, PRUint32 aLen,
PRUint32 aPos)
{
NS_ASSERTION(aText, "aText shouldn't be null");
NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
@ -468,8 +789,9 @@ PRInt32 nsJISx4051LineBreaker::Next(
return nextPos < PRInt32(aLen) ? nextPos : NS_LINEBREAKER_NEED_MORE_TEXT;
}
PRInt32 nsJISx4051LineBreaker::Prev(
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
PRInt32
nsJISx4051LineBreaker::Prev(const PRUnichar* aText, PRUint32 aLen,
PRUint32 aPos)
{
NS_ASSERTION(aText, "aText shouldn't be null");
NS_ASSERTION(aLen >= aPos, "Illegal value (length >= position)");
@ -483,16 +805,19 @@ nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUnichar* aChars, PRUint32 aLeng
PRPackedBool* aBreakBefore)
{
PRUint32 cur;
PRInt8 lastClass = -1;
PRInt8 lastClass = CLASS_NONE;
ContextState state(aChars, aLength);
for (cur = 0; cur < aLength; ++cur) {
PRUnichar ch = aChars[cur];
PRInt8 cl;
state.AdvanceIndexTo(cur);
if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL,
ch,
cur + 1 < aLength ? aChars[cur + 1] : U_NULL);
cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
state);
} else {
cl = GetClass(ch);
}
@ -501,11 +826,16 @@ nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUnichar* aChars, PRUint32 aLeng
if (cur > 0) {
NS_ASSERTION(CLASS_COMPLEX != lastClass || CLASS_COMPLEX != cl,
"Loop should have prevented adjacent complex chars here");
allowBreak = GetPair(lastClass, cl);
if (state.UseConservativeBreaking())
allowBreak = GetPairConservative(lastClass, cl);
else
allowBreak = GetPair(lastClass, cl);
} else {
allowBreak = PR_FALSE;
}
aBreakBefore[cur] = allowBreak;
if (allowBreak)
state.NotifyBreakBefore();
lastClass = cl;
if (CLASS_COMPLEX == cl) {
PRUint32 end = cur + 1;
@ -530,27 +860,35 @@ nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUint8* aChars, PRUint32 aLength
PRPackedBool* aBreakBefore)
{
PRUint32 cur;
PRInt8 lastClass = -1;
PRInt8 lastClass = CLASS_NONE;
ContextState state(aChars, aLength);
for (cur = 0; cur < aLength; ++cur) {
PRUnichar ch = aChars[cur];
PRInt8 cl;
state.AdvanceIndexTo(cur);
if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL,
ch,
cur + 1 < aLength ? aChars[cur + 1] : U_NULL);
cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
state);
} else {
cl = GetClass(ch);
}
PRBool allowBreak;
if (cur > 0) {
allowBreak = GetPair(lastClass, cl);
if (state.UseConservativeBreaking())
allowBreak = GetPairConservative(lastClass, cl);
else
allowBreak = GetPair(lastClass, cl);
} else {
allowBreak = PR_FALSE;
}
aBreakBefore[cur] = allowBreak;
if (allowBreak)
state.NotifyBreakBefore();
lastClass = cl;
}
}

Просмотреть файл

@ -53,11 +53,10 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD>14</TD>
<TD>3</TD>
<TD></TD>
<TD BGCOLOR=white>17</TD>
<TD>12</TD>
<TD>1</TD>
<TD></TD>
<TD BGCOLOR=white>13</TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -78,24 +77,25 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD>13</TD>
<TD>2</TD>
<TD></TD>
<TD>11</TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>01_[a]<TH>
<TD></TD>
<TD>31</TD>
<TD>2</TD>
<TD></TD>
<TD>32</TD>
<TD>6</TD>
<TD>28</TD>
<TD>3</TD>
<TD></TD>
<TD BGCOLOR=white>71</TD>
<TD BGCOLOR=white>64</TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -112,17 +112,17 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD>2</TD>
<TD>14</TD>
<TD></TD>
<TD></TD>
<TD>16</TD>
<TD></TD>
<TD></TD>
<TD>2</TD>
<TD>3</TD>
<TD>4</TD>
<TD>12</TD>
<TD>1</TD>
<TD></TD>
<TD>11</TD>
<TD></TD>
<TD></TD>
<TD>2</TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
@ -131,9 +131,48 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD>4</TD>
<TD></TD>
<TD></TD>
<TD BGCOLOR=white>4</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>3</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>03_8<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD BGCOLOR=white>1</TD>
<TD></TD>
<TD></TD>
@ -151,7 +190,6 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -161,44 +199,6 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>03_8<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>3</TD>
<TD></TD>
<TD BGCOLOR=white>3</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>2</TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
@ -209,8 +209,8 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD>4</TD>
<TD>1</TD>
<TD>5</TD>
<TD></TD>
<TD></TD>
<TD BGCOLOR=white>5</TD>
<TD></TD>
@ -233,9 +233,9 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD>4</TD>
<TD>5</TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -245,20 +245,20 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
</TR>
<TR><TH>05_[b]<TH>
<TD>33</TD>
<TD>154</TD>
<TD>153</TD>
<TD></TD>
<TD>53</TD>
<TD>33</TD>
<TD>2</TD>
<TD>305</TD>
<TD>13</TD>
<TD BGCOLOR=white>560</TD>
<TD>5</TD>
<TD>12</TD>
<TD BGCOLOR=white>238</TD>
<TD>32</TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>154</TD>
<TD>153</TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -266,7 +266,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD>33</TD>
<TD>20</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -277,10 +277,10 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD>305</TD>
<TD>5</TD>
<TD></TD>
<TD></TD>
<TD>13</TD>
<TD>12</TD>
</TR>
<TR><TH>06_15<TH>
<TD></TD>
@ -321,85 +321,46 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
</TR>
<TR><TH>07_16<TH>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD BGCOLOR=white>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>08_18<TH>
<TD>10</TD>
<TD>659</TD>
<TD>4</TD>
<TD>130</TD>
<TD>56</TD>
<TD>941</TD>
<TD>2</TD>
<TD BGCOLOR=white>1802</TD>
<TD></TD>
<TD>10</TD>
<TD></TD>
<TD></TD>
<TD>368</TD>
<TD>1</TD>
<TD>4</TD>
<TD></TD>
<TD>286</TD>
<TD></TD>
<TD></TD>
<TD>4</TD>
<TR><TH>07_18<TH>
<TD>19</TD>
<TD>157</TD>
<TD></TD>
<TD>33</TD>
<TD>54</TD>
<TD>125</TD>
<TD>3</TD>
<TD>127</TD>
<TD>3</TD>
<TD BGCOLOR=white>391</TD>
<TD></TD>
<TD>19</TD>
<TD></TD>
<TD></TD>
<TD>67</TD>
<TD>5</TD>
<TD>3</TD>
<TD>4</TD>
<TD>6</TD>
<TD></TD>
<TD>81</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>3</TD>
<TD>30</TD>
<TD>5</TD>
<TD>12</TD>
<TD>10</TD>
<TD>273</TD>
<TD>646</TD>
<TD>1</TD>
<TD>1</TD>
<TD>4</TD>
<TD>2</TD>
<TD>2</TD>
<TD>2</TD>
<TD>4</TD>
<TD>36</TD>
<TD>4</TD>
<TD></TD>
<TD>3</TD>
<TD>23</TD>
<TD>99</TD>
<TD>1</TD>
<TD>1</TD>
<TD>1</TD>
</TR>
<TR><TH>09_COMPLEX<TH>
<TR><TH>08_COMPLEX<TH>
<TD></TD>
<TD></TD>
<TD></TD>
@ -438,6 +399,123 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
</TR>
<TR><TH>09_[c]<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>3</TD>
<TD>4</TD>
<TD></TD>
<TD BGCOLOR=white>7</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>3</TD>
<TD>2</TD>
<TD></TD>
<TD>2</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>0A_[d]<TH>
<TD>1</TD>
<TD>2</TD>
<TD></TD>
<TD>6</TD>
<TD>21</TD>
<TD>16</TD>
<TD></TD>
<TD BGCOLOR=white>46</TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD>2</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>6</TD>
<TD></TD>
<TD></TD>
<TD>3</TD>
<TD>1</TD>
<TD>1</TD>
<TD>16</TD>
<TD></TD>
<TD>2</TD>
<TD>3</TD>
<TD>7</TD>
<TD>4</TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>0B_[e]<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD>1</TD>
<TD>3</TD>
<TD BGCOLOR=white>5</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>3</TD>
</TR>
<TR><TH>X<TH>
<TD></TD>
<TD></TD>
@ -487,74 +565,26 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD BGCOLOR=red>04_9</TD>
<TD BGCOLOR=red>05_[b]</TD>
<TD BGCOLOR=red>06_15</TD>
<TD BGCOLOR=red>07_16</TD>
<TD BGCOLOR=red>08_18</TD>
<TD BGCOLOR=red>09_COMPLEX</TD>
<TD BGCOLOR=red>07_18</TD>
<TD BGCOLOR=red>08_COMPLEX</TD>
<TD BGCOLOR=red>09_[c]</TD>
<TD BGCOLOR=red>0A_[d]</TD>
<TD BGCOLOR=red>0B_[e]</TD>
<TD BGCOLOR=red>X</TD>
</TR>
<TR><TH>00<TH>
<TD>6</TD>
<TD>14</TD>
<TD></TD>
<TD>2</TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>33</TD>
<TD>10</TD>
<TD>126</TD>
<TD></TD>
<TD>156</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>01<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>128</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>02<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>89</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>03<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>76</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>04<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>226</TD>
<TD></TD>
<TD>7</TD>
<TD>44</TD>
<TD>2</TD>
<TD></TD>
</TR>
<TR><TH>0E<TH>
@ -565,22 +595,26 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD>20</TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>20<TH>
<TD>2</TD>
<TD>11</TD>
<TD>4</TD>
<TD></TD>
<TD>5</TD>
<TD>1</TD>
<TD>12</TD>
<TD></TD>
<TD>4</TD>
<TD>13</TD>
<TD>101</TD>
<TD></TD>
<TD></TD>
<TD>86</TD>
<TD></TD>
<TD>2</TD>
<TD>3</TD>
<TD></TD>
</TR>
<TR><TH>21<TH>
@ -591,88 +625,12 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD>32</TD>
<TD></TD>
<TD>1</TD>
<TD>162</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>22<TH>
<TD>163</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>242</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>23<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>24<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>139</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>25<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>230</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>26<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>106</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>27<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>160</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>30<TH>
<TD>10</TD>
@ -686,43 +644,6 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>32<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>132</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>33<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>188</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>4E<TH>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>256</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
</TR>

Просмотреть файл

@ -396,11 +396,11 @@ printf "[%s || %s]\n", $r, $def;
}
print HEADER "};\n\n";
}
printarray("00", "8");
printarray("20", "8");
printarray("21", "8");
printarray("00", "7");
printarray("20", "7");
printarray("21", "7");
printarray("30", "5");
printarray("0E", "9");
printarray("0E", "8");
#print %rangecount;

Просмотреть файл

@ -1,7 +1,85 @@
0028;;1
002F;;2
005B;;1
007B;;1
0000;001f;17
0020;;17
0024;;24
0027;;18
0028;;22
002D;;18
002F;;18
0021;002F;23
0030;0039;15
003C;;22
003F;;4
003A;003F;23
0040;;18
0041;005A;18
005B;;22
005E;;18
005F;;18
005B;005F;23
0060;;18
0061;007A;18
007B;;22
007B;007E;23
00A0;;24
00A3;;22
00A5;;22
00A9;;18
00AA;;18
00AC;;22
00AE;;18
00AF;;18
00A1;00BF;23
00B0;;18
00F7;;23
00C0;00FF;18
0E3F;;1
0E2F;;4
0E46;;4
0E5A;0E5B;4
0E50;0E59;15
0E4F;;18
0EAF;;4
0EC6;;4
0ED0;0ED9;15
2007;;24
2000;200B;17
200C;200F;18
2010;;2
2011;;24
2012;2013;2
2014;;7
2015;;18
2016;2017;18
2018;201F;18
2020;2023;18
2024;2026;7
2027;;23
2028;202E;18
202F;;24
2030;2034;9
2035;2038;18
2039;;1
203A;;2
203B;;12
203C;203D;3
203E;;23
203F;2043;18
2044;;3
2045;;1
2046;;2
2047;2049;3
204A;2063;18
206A;206F;18
2070;2071;18
2074;208E;18
2090;2094;18
2116;;8
2160;217F;12
2190;21EA;a12
2126;;18
2100;2138;18
2153;2182;18
2190;21EA;18
3008;;1
300A;;1
300C;;1
@ -12,10 +90,6 @@
3018;;1
301A;;1
301D;;1
0029;;2
002C;;2
005D;;2
007D;;2
3001;;2
3009;;2
300B;;2
@ -28,8 +102,6 @@
301B;;2
301E;;2
301F;;2
203C;;3
2044;;3
301C;;3
3041;;3
3043;;3
@ -58,65 +130,11 @@
30FC;;3
30FD;;3
30FE;;3
0021;;4
003F;;4
003A;;18
003B;;5
30FB;;5
002E;;6
3002;;6
2014;;7
2024;;2
2025;;2
2026;;2
0024;;1
005C;;1
00A3;;8
00A5;;8
2116;;8
0025;;2
00A2;;9
00B0;;2
2030;;9
2031;;9
2032;;9
2033;;9
3000;;10
3042;3094;11
3099;309E;3
002B;;18
002D;;2
003C;;1
003D;;2
003E;;2
00A7;;18
00A9;;18
00AE;;18
00B1;;18
00B6;;18
00D7;;18
00F7;;18
203B;;12
2160;217F;12
2190;21EA;a12
2460;24EA;a12
2500;257F;a12
2580;2595;a12
25A0;25EF;a12
2600;2613;a12
261A;266F;a12
2701;2704;a12
2706;2709;a12
270C;2727;a12
2729;274B;a12
274D;;a12
274F;2752;a12
2756;;a12
2758;275E;a12
2761;2767;a12
2776;2794;a12
2798;27AF;a12
27B1;27BE;a12
3003;;12
3004;;12
3006;;12
@ -126,76 +144,3 @@
3020;;12
3036;;12
30A2;30FA;12
3220;3243;12
3280;32B0;12
32D0;32FE;12
3300;3357;12
3371;3376;12
3380;33DD;12
4E00;9F45;12
0030;0039;15
2126;;16
0020;;17
0000;001f;17
0021;007E;18
00A1;00FF;18
0100;017F;18
0250;02A8;18
0374;0375;18
037A;;18
037E;;18
0384;038A;18
038C;;18
038E;03A1;18
03A3;03CE;18
0401;040C;18
040E;044F;18
0451;045C;18
045E;047F;18
0480;0486;18
0480;0486;18
0490;04C4;18
04C7;04C8;18
04CB;04CC;18
04D0;04EB;18
04EE;04F5;18
04F8;04F9;18
2000;200B;17
200C;202E;18
2030;2046;18
2070;;18
2074;208E;18
20A0;20AA;18
2100;2138;18
2153;2182;18
2190;21EA;18
2200;227F;18
2280;22F1;18
2312;;18
2460;24EA;18
2500;257F;18
2580;2595;18
25A0;25EF;18
2600;2613;18
261A;266F;18
2701;2704;18
2706;2709;18
270C;2727;18
2729;274B;18
274D;;18
274F;2752;18
2756;;18
2758;275E;18
2761;2767;18
2776;2794;18
2798;27AF;18
27B1;27BE;18
0E3F;;1
0E2F;;4
0E46;;4
0E5A;0E5B;4
0E50;0E59;15
0E4F;;18
0EAF;;4
0EC6;;4
0ED0;0ED9;15

Просмотреть файл

@ -13,9 +13,12 @@
13;X
14;X
15;06_15
16;07_16
16;X
17;05_[b]
18;08_18
18;07_18
19;X
20;X
21;09_COMPLEX
21;08_COMPLEX
22;09_[c]
23;0A_[d]
24;0B_[e]

Просмотреть файл

@ -21,99 +21,102 @@ td {
</head>
<body>
<p>This is a specification table for line breaking.</p>
<p>The value 'A' means the line breakable After the character, and 'B' means Before. 'BA' means Before and After.</p>
<p>(C) which is the tail of the browser name means Character. (N) means Numeric.
This means that they are around the character. E.g., "a$a" is a testcase for (C), "0$0" is a testcase for (N).</p>
<p>The values of IE7 and Opera9: 'A' means that the line is breakable After the character, and 'B' means Before. 'BA' means Before and After.</p>
<p>(C) which is the tail of the IE7 and the Opera9 means Character. (N) means Numeric.
This means that they are around the character at testing. E.g., "a$a" is a testcase for (C), "0$0" is a testcase for (N).</p>
<p>Gecko is not breaking the lines on most western language context. But for file paths, URLs and very long word which is connected hyphens,
some characters might be breakable. They are 'breakable' in the table. However, they are not always breakable,
they <em>depend on the context</em> in the word.</p>
<table border="1">
<thead>
<tr><th colspan="2">character</th><th>Gecko(C)</th><th>Gecko(N)</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
</thead>
<tfoot>
<tr><th colspan="2">character</th><th>Gecko(C)</th><th>Gecko(N)</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
</tfoot>
<tbody>
<tr><th>0x21</th><th>&#x21;</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x22</th><th>&#x22;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x23</th><th>&#x23;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x24</th><th>&#x24;</th><td>B</td><td>B</td><td></td><td>B</td><td></td><td></td></tr>
<tr><th>0x25</th><th>&#x25;</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x26</th><th>&#x26;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x27</th><th>&#x27;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x28</th><th>&#x28;</th><td>B</td><td>B</td><td>B</td><td>B</td><td></td><td></td></tr>
<tr><th>0x29</th><th>&#x29;</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x2A</th><th>&#x2A;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x2B</th><th>&#x2B;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x2C</th><th>&#x2C;</th><td>A</td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x2D</th><th>&#x2D;</th><td>A</td><td></td><td>BA</td><td>BA</td><td>A</td><td>A</td></tr>
<tr><th>0x2E</th><th>&#x2E;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x2F</th><th>&#x2F;</th><td>A</td><td></td><td></td><td></td><td>A</td><td>A</td></tr>
<tr><th>0x21</th><th>&#x21;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x22</th><th>&#x22;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x23</th><th>&#x23;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x24</th><th>&#x24;</th><td></td><td></td><td>B</td><td></td><td></td></tr>
<tr><th>0x25</th><th>&#x25;</th><td>breakable</td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x26</th><th>&#x26;</th><td>breakable</td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x27</th><th>&#x27;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x28</th><th>&#x28;</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
<tr><th>0x29</th><th>&#x29;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x2A</th><th>&#x2A;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x2B</th><th>&#x2B;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x2C</th><th>&#x2C;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x2D</th><th>&#x2D;</th><td>breakable</td><td>BA</td><td>BA</td><td>A</td><td>A</td></tr>
<tr><th>0x2E</th><th>&#x2E;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x2F</th><th>&#x2F;</th><td>breakable</td><td></td><td></td><td>A</td><td>A</td></tr>
</tbody>
<tbody>
<tr><th>0x3A</th><th>&#x3A;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3B</th><th>&#x3B;</th><td>A</td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3C</th><th>&#x3C;</th><td>B</td><td>B</td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3D</th><th>&#x3D;</th><td>A</td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3E</th><th>&#x3E;</th><td>A</td><td>A</td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3F</th><th>&#x3F;</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x3A</th><th>&#x3A;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3B</th><th>&#x3B;</th><td>breakable</td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3C</th><th>&#x3C;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3D</th><th>&#x3D;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3E</th><th>&#x3E;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x3F</th><th>&#x3F;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
</tbody>
<tbody>
<tr><th>0x40</th><th>&#x40;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x40</th><th>&#x40;</th><td></td><td></td><td></td><td></td></tr>
</tbody>
<tbody>
<tr><th>0x5B</th><th>&#x5B;</th><td>B</td><td>B</td><td>B</td><td>B</td><td></td><td></td></tr>
<tr><th>0x5C</th><th>&#x5C;</th><td>B</td><td>B</td><td></td><td>B</td><td></td><td></td></tr>
<tr><th>0x5D</th><th>&#x5D;</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x5E</th><th>&#x5E;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x5F</th><th>&#x5F;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x5B</th><th>&#x5B;</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
<tr><th>0x5C</th><th>&#x5C;</th><td>breakable</td><td></td><td>B</td><td></td><td></td></tr>
<tr><th>0x5D</th><th>&#x5D;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x5E</th><th>&#x5E;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x5F</th><th>&#x5F;</th><td></td><td></td><td></td><td></td><td></td></tr>
</tbody>
<tbody>
<tr><th>0x60</th><th>&#x60;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x60</th><th>&#x60;</th><td></td><td></td><td></td><td></td><td></td></tr>
</tbody>
<tbody>
<tr><th>0x7B</th><th>&#x7B;</th><td>B</td><td>B</td><td>B</td><td>B</td><td></td><td></td></tr>
<tr><th>0x7C</th><th>&#x7C;</th><td></td><td></td><td></td><td></td><td>A</td><td>A</td></tr>
<tr><th>0x7D</th><th>&#x7D;</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x7E</th><th>&#x7E;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0x7B</th><th>&#x7B;</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
<tr><th>0x7C</th><th>&#x7C;</th><td></td><td></td><td></td><td>A</td><td>A</td></tr>
<tr><th>0x7D</th><th>&#x7D;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0x7E</th><th>&#x7E;</th><td></td><td></td><td></td><td></td><td></td></tr>
</tbody>
<tbody>
<tr><th>0xA1</th><th>&#xA1;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA2</th><th>&#xA2;</th><td>BA</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0xA3</th><th>&#xA3;</th><td>BA</td><td>B</td><td></td><td>B</td><td></td><td></td></tr>
<tr><th>0xA4</th><th>&#xA4;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA5</th><th>&#xA5;</th><td>BA</td><td>B</td><td></td><td>B</td><td></td><td></td></tr>
<tr><th>0xA6</th><th>&#xA6;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA7</th><th>&#xA7;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA8</th><th>&#xA8;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA9</th><th>&#xA9;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAA</th><th>&#xAA;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAB</th><th>&#xAB;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAC</th><th>&#xAC;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAE</th><th>&#xAE;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAF</th><th>&#xAF;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA1</th><th>&#xA1;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA2</th><th>&#xA2;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0xA3</th><th>&#xA3;</th><td></td><td></td><td>B</td><td></td><td></td></tr>
<tr><th>0xA4</th><th>&#xA4;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA5</th><th>&#xA5;</th><td></td><td></td><td>B</td><td></td><td></td></tr>
<tr><th>0xA6</th><th>&#xA6;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA7</th><th>&#xA7;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA8</th><th>&#xA8;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xA9</th><th>&#xA9;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAA</th><th>&#xAA;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAB</th><th>&#xAB;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAC</th><th>&#xAC;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAE</th><th>&#xAE;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xAF</th><th>&#xAF;</th><td></td><td></td><td></td><td></td><td></td></tr>
</tbody>
<tbody>
<tr><th>0xB0</th><th>&#xB0;</th><td>A</td><td>A</td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0xB1</th><th>&#xB1;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB2</th><th>&#xB2;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB3</th><th>&#xB3;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB4</th><th>&#xB4;</th><td></td><td></td><td></td><td></td><td>B</td><td>B</td></tr>
<tr><th>0xB5</th><th>&#xB5;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB6</th><th>&#xB6;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB7</th><th>&#xB7;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB8</th><th>&#xB8;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB9</th><th>&#xB9;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBA</th><th>&#xBA;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBB</th><th>&#xBB;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBC</th><th>&#xBC;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBD</th><th>&#xBD;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBE</th><th>&#xBE;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBF</th><th>&#xBF;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB0</th><th>&#xB0;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
<tr><th>0xB1</th><th>&#xB1;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB2</th><th>&#xB2;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB3</th><th>&#xB3;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB4</th><th>&#xB4;</th><td></td><td></td><td></td><td>B</td><td>B</td></tr>
<tr><th>0xB5</th><th>&#xB5;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB6</th><th>&#xB6;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB7</th><th>&#xB7;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB8</th><th>&#xB8;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xB9</th><th>&#xB9;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBA</th><th>&#xBA;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBB</th><th>&#xBB;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBC</th><th>&#xBC;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBD</th><th>&#xBD;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBE</th><th>&#xBE;</th><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xBF</th><th>&#xBF;</th><td></td><td></td><td></td><td></td><td></td></tr>
</tbody>
<tbody>
<tr><th>0xD7</th><th>&#xD7;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xD7</th><th>&#xD7;</th><td></td><td></td><td></td><td></td><td></td></tr>
</tbody>
<tbody>
<tr><th>0xF7</th><th>&#xF7;</th><td></td><td></td><td></td><td></td><td></td><td></td></tr>
<tr><th>0xF7</th><th>&#xF7;</th><td></td><td></td><td></td><td></td><td></td></tr>
</tbody>
</table>
</body>