implement String/Symbol#casecmp? including Unicode case folding

* string.c: Implement String#casecmp? and Symbol#casecmp? by using
  String#downcase :fold for Unicode case folding. This does not include
  options such as :turkic, because these currently cannot be combined
  with the :fold option. This implements feature #12786.

* test/ruby/test_string.rb/test_symbol.rb: Tests for above.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56912 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
duerst 2016-11-28 08:37:32 +00:00
Родитель bd2fd73196
Коммит ad619e02c4
3 изменённых файлов: 69 добавлений и 0 удалений

Просмотреть файл

@ -3194,6 +3194,40 @@ rb_str_casecmp(VALUE str1, VALUE str2)
return INT2FIX(-1);
}
/*
* call-seq:
* str.casecmp?(other_str) -> true, false, or nil
*
* Returns true if str and other_other_str are equal after Unicode case folding,
* false if they are not equal, and nil if other_str is not a string.
*
* "abcdef".casecmp("abcde") #=> false
* "aBcDeF".casecmp("abcdef") #=> true
* "abcdef".casecmp("abcdefg") #=> false
* "abcdef".casecmp("ABCDEF") #=> true
*/
static VALUE rb_str_downcase(int argc, VALUE *argv, VALUE str); /* forward declaration */
static VALUE
rb_str_casecmp_p(VALUE str1, VALUE str2)
{
rb_encoding *enc;
VALUE folded_str1, folded_str2;
VALUE fold_opt = sym_fold;
StringValue(str2);
enc = rb_enc_compatible(str1, str2);
if (!enc) {
return Qnil;
}
folded_str1 = rb_str_downcase(1, &fold_opt, str1);
folded_str2 = rb_str_downcase(1, &fold_opt, str2);
return rb_str_eql(folded_str1, folded_str2);
}
#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
static long
@ -9615,6 +9649,24 @@ sym_casecmp(VALUE sym, VALUE other)
return rb_str_casecmp(rb_sym2str(sym), rb_sym2str(other));
}
/*
* call-seq:
*
* sym.casecmp?(other) -> true, false, or nil
*
* Returns true if sym and other are equal after Unicode case folding,
* false if they are not equal, and nil if other is not a symbol.
*/
static VALUE
sym_casecmp_p(VALUE sym, VALUE other)
{
if (!SYMBOL_P(other)) {
return Qnil;
}
return rb_str_casecmp_p(rb_sym2str(sym), rb_sym2str(other));
}
/*
* call-seq:
* sym =~ obj -> integer or nil
@ -9814,6 +9866,7 @@ Init_String(void)
rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
rb_define_method(rb_cString, "hash", rb_str_hash_m, 0);
rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
rb_define_method(rb_cString, "casecmp?", rb_str_casecmp_p, 1);
rb_define_method(rb_cString, "+", rb_str_plus, 1);
rb_define_method(rb_cString, "*", rb_str_times, 1);
rb_define_method(rb_cString, "%", rb_str_format_m, 1);
@ -9963,6 +10016,7 @@ Init_String(void)
rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1);
rb_define_method(rb_cSymbol, "casecmp?", sym_casecmp_p, 1);
rb_define_method(rb_cSymbol, "=~", sym_match, 1);
rb_define_method(rb_cSymbol, "[]", sym_aref, -1);

Просмотреть файл

@ -2210,6 +2210,13 @@ CODE
assert_equal(1, "\u3042B".casecmp("\u3042a"))
end
def test_casecmp?
assert_equal(true, 'FoO'.casecmp?('fOO'))
assert_equal(false, 'FoO'.casecmp?('BaR'))
assert_equal(false, 'baR'.casecmp?('FoO'))
assert_equal(true, 'äöü'.casecmp?('ÄÖÜ'))
end
def test_upcase2
assert_equal("\u3042AB", "\u3042aB".upcase)
end

Просмотреть файл

@ -276,6 +276,14 @@ class TestSymbol < Test::Unit::TestCase
assert_nil(:foo.casecmp("foo"))
end
def test_casecmp?
assert_equal(true, :FoO.casecmp?(:fOO))
assert_equal(false, :FoO.casecmp?(:BaR))
assert_equal(false, :baR.casecmp?(:FoO))
assert_nil(:foo.casecmp?("foo"))
assert_equal(true, :äöü.casecmp?(:ÄÖÜ))
end
def test_length
assert_equal(3, :FoO.length)
assert_equal(3, :FoO.size)