* unicode.c (onigenc_unicode_property_name_to_ctype):

ignore case of properties.

* tool/enc-unicode.rb: downcase properties list.

* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
  enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
  follow above.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@24836 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2009-09-10 22:54:01 +00:00
Родитель e519c9d960
Коммит ee4b59a419
7 изменённых файлов: 1312 добавлений и 1223 удалений

Просмотреть файл

@ -1,3 +1,14 @@
Fri Sep 11 07:52:43 2009 NARUSE, Yui <naruse@ruby-lang.org>
* unicode.c (onigenc_unicode_property_name_to_ctype):
ignore case of properties.
* tool/enc-unicode.rb: downcase properties list.
* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
follow above.
Fri Sep 11 05:00:19 2009 Koichi Sasada <ko1@atdot.net>
* include/ruby/ruby.h (rb_data_type_t): Add comments.

Просмотреть файл

@ -2093,16 +2093,16 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end
p = name;
len = 0;
while (p < end) {
for (p = name; p < end; p += enclen(enc, p, end)) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (code == ' ' || code == '-' || code == '_')
continue;
if (code >= 0x80)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
buf[len++] = (UChar )code;
buf[len++] = (UChar )TOLOWER((unsigned char)code);
if (len >= PROPERTY_NAME_MAX_SIZE)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
p += enclen(enc, p, end);
}
buf[len] = 0;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
%}
struct uniname2ctype_struct;
%%
NEWLINE, 0
Alpha, 1
Blank, 2
Cntrl, 3
Digit, 4
Graph, 5
Lower, 6
Print, 7
Punct, 8
Space, 9
Upper, 10
XDigit, 11
Word, 12
Alnum, 13
ASCII, 14
newline, 0
alpha, 1
blank, 2
cntrl, 3
digit, 4
graph, 5
lower, 6
print, 7
punct, 8
space, 9
upper, 10
xdigit, 11
word, 12
alnum, 13
ascii, 14
#ifdef USE_UNICODE_PROPERTIES
Any, 15
Assigned, 16
C, 17
Cc, 18
Cf, 19
Cn, 20
Co, 21
Cs, 22
L, 23
Ll, 24
Lm, 25
Lo, 26
Lt, 27
Lu, 28
M, 29
Mc, 30
Me, 31
Mn, 32
N, 33
Nd, 34
Nl, 35
No, 36
P, 37
Pc, 38
Pd, 39
Pe, 40
Pf, 41
Pi, 42
Po, 43
Ps, 44
S, 45
Sc, 46
Sk, 47
Sm, 48
So, 49
Z, 50
Zl, 51
Zp, 52
Zs, 53
Common, 54
Latin, 55
Greek, 56
Cyrillic, 57
Armenian, 58
Hebrew, 59
Arabic, 60
Syriac, 61
Thaana, 62
Devanagari, 63
Bengali, 64
Gurmukhi, 65
Gujarati, 66
Oriya, 67
Tamil, 68
Telugu, 69
Kannada, 70
Malayalam, 71
Sinhala, 72
Thai, 73
Lao, 74
Tibetan, 75
Myanmar, 76
Georgian, 77
Hangul, 78
Ethiopic, 79
Cherokee, 80
Canadian_Aboriginal, 81
Ogham, 82
Runic, 83
Khmer, 84
Mongolian, 85
Hiragana, 86
Katakana, 87
Bopomofo, 88
Han, 89
Yi, 90
Old_Italic, 91
Gothic, 92
Deseret, 93
Inherited, 94
Tagalog, 95
Hanunoo, 96
Buhid, 97
Tagbanwa, 98
Limbu, 99
Tai_Le, 100
Linear_B, 101
Ugaritic, 102
Shavian, 103
Osmanya, 104
Cypriot, 105
Braille, 106
Buginese, 107
Coptic, 108
New_Tai_Lue, 109
Glagolitic, 110
Tifinagh, 111
Syloti_Nagri, 112
Old_Persian, 113
Kharoshthi, 114
Balinese, 115
Cuneiform, 116
Phoenician, 117
Phags_Pa, 118
Nko, 119
Sundanese, 120
Lepcha, 121
Ol_Chiki, 122
Vai, 123
Saurashtra, 124
Kayah_Li, 125
Rejang, 126
Lycian, 127
Carian, 128
Lydian, 129
Cham, 130
any, 15
assigned, 16
c, 17
cc, 18
cf, 19
cn, 20
co, 21
cs, 22
l, 23
ll, 24
lm, 25
lo, 26
lt, 27
lu, 28
m, 29
mc, 30
me, 31
mn, 32
n, 33
nd, 34
nl, 35
no, 36
p, 37
pc, 38
pd, 39
pe, 40
pf, 41
pi, 42
po, 43
ps, 44
s, 45
sc, 46
sk, 47
sm, 48
so, 49
z, 50
zl, 51
zp, 52
zs, 53
common, 54
latin, 55
greek, 56
cyrillic, 57
armenian, 58
hebrew, 59
arabic, 60
syriac, 61
thaana, 62
devanagari, 63
bengali, 64
gurmukhi, 65
gujarati, 66
oriya, 67
tamil, 68
telugu, 69
kannada, 70
malayalam, 71
sinhala, 72
thai, 73
lao, 74
tibetan, 75
myanmar, 76
georgian, 77
hangul, 78
ethiopic, 79
cherokee, 80
canadianaboriginal, 81
ogham, 82
runic, 83
khmer, 84
mongolian, 85
hiragana, 86
katakana, 87
bopomofo, 88
han, 89
yi, 90
olditalic, 91
gothic, 92
deseret, 93
inherited, 94
tagalog, 95
hanunoo, 96
buhid, 97
tagbanwa, 98
limbu, 99
taile, 100
linearb, 101
ugaritic, 102
shavian, 103
osmanya, 104
cypriot, 105
braille, 106
buginese, 107
coptic, 108
newtailue, 109
glagolitic, 110
tifinagh, 111
sylotinagri, 112
oldpersian, 113
kharoshthi, 114
balinese, 115
cuneiform, 116
phoenician, 117
phagspa, 118
nko, 119
sundanese, 120
lepcha, 121
olchiki, 122
vai, 123
saurashtra, 124
kayahli, 125
rejang, 126
lycian, 127
carian, 128
lydian, 129
cham, 130
#endif /* USE_UNICODE_PROPERTIES */
%%
static int

Просмотреть файл

@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
%}
struct uniname2ctype_struct;
%%
NEWLINE, 0
Alpha, 1
Blank, 2
Cntrl, 3
Digit, 4
Graph, 5
Lower, 6
Print, 7
Punct, 8
Space, 9
Upper, 10
XDigit, 11
Word, 12
Alnum, 13
ASCII, 14
newline, 0
alpha, 1
blank, 2
cntrl, 3
digit, 4
graph, 5
lower, 6
print, 7
punct, 8
space, 9
upper, 10
xdigit, 11
word, 12
alnum, 13
ascii, 14
#ifdef USE_UNICODE_PROPERTIES
Any, 15
Assigned, 16
C, 17
Cc, 18
Cf, 19
Cn, 20
Co, 21
Cs, 22
L, 23
Ll, 24
Lm, 25
Lo, 26
Lt, 27
Lu, 28
M, 29
Mc, 30
Me, 31
Mn, 32
N, 33
Nd, 34
Nl, 35
No, 36
P, 37
Pc, 38
Pd, 39
Pe, 40
Pf, 41
Pi, 42
Po, 43
Ps, 44
S, 45
Sc, 46
Sk, 47
Sm, 48
So, 49
Z, 50
Zl, 51
Zp, 52
Zs, 53
Common, 54
Latin, 55
Greek, 56
Cyrillic, 57
Armenian, 58
Hebrew, 59
Arabic, 60
Syriac, 61
Thaana, 62
Devanagari, 63
Bengali, 64
Gurmukhi, 65
Gujarati, 66
Oriya, 67
Tamil, 68
Telugu, 69
Kannada, 70
Malayalam, 71
Sinhala, 72
Thai, 73
Lao, 74
Tibetan, 75
Myanmar, 76
Georgian, 77
Hangul, 78
Ethiopic, 79
Cherokee, 80
Canadian_Aboriginal, 81
Ogham, 82
Runic, 83
Khmer, 84
Mongolian, 85
Hiragana, 86
Katakana, 87
Bopomofo, 88
Han, 89
Yi, 90
Old_Italic, 91
Gothic, 92
Deseret, 93
Inherited, 94
Tagalog, 95
Hanunoo, 96
Buhid, 97
Tagbanwa, 98
Limbu, 99
Tai_Le, 100
Linear_B, 101
Ugaritic, 102
Shavian, 103
Osmanya, 104
Cypriot, 105
Braille, 106
Buginese, 107
Coptic, 108
New_Tai_Lue, 109
Glagolitic, 110
Tifinagh, 111
Syloti_Nagri, 112
Old_Persian, 113
Kharoshthi, 114
Balinese, 115
Cuneiform, 116
Phoenician, 117
Phags_Pa, 118
Nko, 119
Sundanese, 120
Lepcha, 121
Ol_Chiki, 122
Vai, 123
Saurashtra, 124
Kayah_Li, 125
Rejang, 126
Lycian, 127
Carian, 128
Lydian, 129
Cham, 130
any, 15
assigned, 16
c, 17
cc, 18
cf, 19
cn, 20
co, 21
cs, 22
l, 23
ll, 24
lm, 25
lo, 26
lt, 27
lu, 28
m, 29
mc, 30
me, 31
mn, 32
n, 33
nd, 34
nl, 35
no, 36
p, 37
pc, 38
pd, 39
pe, 40
pf, 41
pi, 42
po, 43
ps, 44
s, 45
sc, 46
sk, 47
sm, 48
so, 49
z, 50
zl, 51
zp, 52
zs, 53
common, 54
latin, 55
greek, 56
cyrillic, 57
armenian, 58
hebrew, 59
arabic, 60
syriac, 61
thaana, 62
devanagari, 63
bengali, 64
gurmukhi, 65
gujarati, 66
oriya, 67
tamil, 68
telugu, 69
kannada, 70
malayalam, 71
sinhala, 72
thai, 73
lao, 74
tibetan, 75
myanmar, 76
georgian, 77
hangul, 78
ethiopic, 79
cherokee, 80
canadianaboriginal, 81
ogham, 82
runic, 83
khmer, 84
mongolian, 85
hiragana, 86
katakana, 87
bopomofo, 88
han, 89
yi, 90
olditalic, 91
gothic, 92
deseret, 93
inherited, 94
tagalog, 95
hanunoo, 96
buhid, 97
tagbanwa, 98
limbu, 99
taile, 100
linearb, 101
ugaritic, 102
shavian, 103
osmanya, 104
cypriot, 105
braille, 106
buginese, 107
coptic, 108
newtailue, 109
glagolitic, 110
tifinagh, 111
sylotinagri, 112
oldpersian, 113
kharoshthi, 114
balinese, 115
cuneiform, 116
phoenician, 117
phagspa, 118
nko, 119
sundanese, 120
lepcha, 121
olchiki, 122
vai, 123
saurashtra, 124
kayahli, 125
rejang, 126
lycian, 127
carian, 128
lydian, 129
cham, 130
#endif /* USE_UNICODE_PROPERTIES */
%%
static int

Просмотреть файл

@ -176,6 +176,12 @@ def make_const(prop, pairs, name)
puts "}; /* CR_#{prop} */"
end
def normalize_propname(name)
name = name.downcase
name.gsub!(/[- _]/, '')
name
end
puts '%{'
gcps, data = parse_unicode_data(ARGV[0])
POSIX_NAMES.each do |name|
@ -213,10 +219,10 @@ struct uniname2ctype_struct;
%%
__HEREDOC
i = -1
POSIX_NAMES.each {|name|puts"%-21s %3d"%[name+',', i+=1]}
POSIX_NAMES.each {|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
puts "#ifdef USE_UNICODE_PROPERTIES"
gcps.each{|name|puts"%-21s %3d"%[name+',', i+=1]}
scripts.each{|name|puts"%-21s %3d"%[name+',', i+=1]}
gcps.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
scripts.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
puts "#endif /* USE_UNICODE_PROPERTIES */\n"
puts(<<'__HEREDOC')
%%