From dbf7e6f9f232a3173b118c5f1ad57f97aa7e4e9d Mon Sep 17 00:00:00 2001 From: naruse Date: Tue, 9 Nov 2010 07:36:33 +0000 Subject: [PATCH] * regenc.c (onigenc_minimum_property_name_to_ctype): \p{...} should be case insensitive. [ruby-core:33000] * regenc.c (onigenc_property_list_add_property): ditto. * enc/euc_jp.c (init_property_list, property_name_to_ctype): to lowercase property names. * enc/shift_jis.c (init_property_list, property_name_to_ctype): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29732 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 14 ++++++++++++++ enc/euc_jp.c | 14 ++++++++++---- enc/shift_jis.c | 14 ++++++++++---- regenc.c | 2 +- test/ruby/test_m17n.rb | 8 ++++++++ 5 files changed, 43 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 321b343787..bcd6d26b16 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +Tue Nov 9 13:24:33 2010 NARUSE, Yui + + * regenc.c (onigenc_minimum_property_name_to_ctype): + \p{...} should be case insensitive. [ruby-core:33000] + + * regenc.c (onigenc_property_list_add_property): + ditto. + + * enc/euc_jp.c (init_property_list, property_name_to_ctype): + to lowercase property names. + + * enc/shift_jis.c (init_property_list, property_name_to_ctype): + ditto. + Tue Nov 9 13:29:36 2010 NAKAMURA Usaku * win32/win32.c (overlapped_socket_io): get rid of a warning of 64bit diff --git a/enc/euc_jp.c b/enc/euc_jp.c index d4a9f57866..3b339f9600 100644 --- a/enc/euc_jp.c +++ b/enc/euc_jp.c @@ -274,8 +274,8 @@ init_property_list(void) { int r; - PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); - PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PROPERTY_LIST_ADD_PROP("hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("katakana", CR_Katakana); PropertyInited = 1; end: @@ -286,11 +286,17 @@ static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { st_data_t ctype; + UChar *s, *e; PROPERTY_LIST_INIT_CHECK; - if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { - return onigenc_minimum_property_name_to_ctype(enc, p, end); + s = e = ALLOC_N(UChar, end-p+1); + for (; p < end; p++) { + *e++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + + if (onig_st_lookup_strend(PropertyNameTable, s, e, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, s, e); } return ctype; diff --git a/enc/shift_jis.c b/enc/shift_jis.c index f7614205af..cac4cf7751 100644 --- a/enc/shift_jis.c +++ b/enc/shift_jis.c @@ -283,8 +283,8 @@ init_property_list(void) { int r; - PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); - PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PROPERTY_LIST_ADD_PROP("hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("katakana", CR_Katakana); PropertyInited = 1; end: @@ -295,11 +295,17 @@ static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { hash_data_type ctype; + UChar *s, *e; PROPERTY_LIST_INIT_CHECK; - if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { - return onigenc_minimum_property_name_to_ctype(enc, p, end); + s = e = ALLOC_N(UChar, end-p+1); + for (; p < end; p++) { + *e++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + + if (onig_st_lookup_strend(PropertyNameTable, s, e, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, s, e); } return (int)ctype; diff --git a/regenc.c b/regenc.c index b9b03b03bf..04f6a4627e 100644 --- a/regenc.c +++ b/regenc.c @@ -811,7 +811,7 @@ onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) len = onigenc_strlen(enc, p, end); for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) { if (len == pb->len && - onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) + STRNCASECMP(p, pb->name, len) == 0) return pb->ctype; } diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 53abfccb82..863f047ad8 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -538,6 +538,14 @@ class TestM17N < Test::Unit::TestCase assert(r.fixed_encoding?) assert_match(r, "\xa4\xa2".force_encoding("euc-jp")) + r = /\p{AsciI}/e + assert(r.fixed_encoding?) + assert_match(r, "a".force_encoding("euc-jp")) + + r = /\p{hiraganA}/e + assert(r.fixed_encoding?) + assert_match(r, "\xa4\xa2".force_encoding("euc-jp")) + r = eval('/\u{3042}\p{Hiragana}/'.force_encoding("euc-jp")) assert(r.fixed_encoding?) assert_equal(Encoding::UTF_8, r.encoding)