From c71019f8887f3f524f5ef3c0a7491ff14bef0b16 Mon Sep 17 00:00:00 2001 From: naruse Date: Mon, 21 Apr 2014 10:14:46 +0000 Subject: [PATCH] * ext/-test-/string/coderange.c: add Bug::String.new#coderange_scan to explicitly scan coderange. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@45649 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++ ext/-test-/string/coderange.c | 21 +++++++++- test/-ext-/string/test_coderange.rb | 59 +++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 test/-ext-/string/test_coderange.rb diff --git a/ChangeLog b/ChangeLog index cafd1e708d..82290f35ec 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Mon Apr 21 18:55:21 2014 NARUSE, Yui + + * ext/-test-/string/coderange.c: add Bug::String.new#coderange_scan + to explicitly scan coderange. + Mon Apr 21 18:19:35 2014 NARUSE, Yui * string.c (coderange_scan): remove unused logic. diff --git a/ext/-test-/string/coderange.c b/ext/-test-/string/coderange.c index 2f1e6a06a8..b93172d72d 100644 --- a/ext/-test-/string/coderange.c +++ b/ext/-test-/string/coderange.c @@ -2,10 +2,11 @@ #include "ruby/encoding.h" static VALUE sym_7bit, sym_valid, sym_unknown, sym_broken; + static VALUE -str_coderange(VALUE str) +coderange_int2sym(int coderange) { - switch (ENC_CODERANGE(str)) { + switch (coderange) { case ENC_CODERANGE_7BIT: return sym_7bit; case ENC_CODERANGE_VALID: @@ -19,6 +20,21 @@ str_coderange(VALUE str) UNREACHABLE; } +/* return coderange without scan */ +static VALUE +str_coderange(VALUE str) +{ + return coderange_int2sym(ENC_CODERANGE(str)); +} + +/* scan coderange and return the result */ +static VALUE +str_coderange_scan(VALUE str) +{ + ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN); + return coderange_int2sym(rb_enc_str_coderange(str)); +} + void Init_coderange(VALUE klass) { @@ -27,4 +43,5 @@ Init_coderange(VALUE klass) sym_unknown = ID2SYM(rb_intern("unknown")); sym_broken = ID2SYM(rb_intern("broken")); rb_define_method(klass, "coderange", str_coderange, 0); + rb_define_method(klass, "coderange_scan", str_coderange_scan, 0); } diff --git a/test/-ext-/string/test_coderange.rb b/test/-ext-/string/test_coderange.rb new file mode 100644 index 0000000000..83cebf19c0 --- /dev/null +++ b/test/-ext-/string/test_coderange.rb @@ -0,0 +1,59 @@ +# coding: ascii-8bit +require 'test/unit' +require "-test-/string/string" +require "rbconfig/sizeof" + +class Test_StringCoderange < Test::Unit::TestCase + def setup + @sizeof_voidp = RbConfig::SIZEOF["void*"] + @a8 = Encoding::ASCII_8BIT + @a7 = Encoding::US_ASCII + @u8 = Encoding::UTF_8 + end + + def test_ascii8bit + enc = @a8 + str = "a" + str.force_encoding(enc) + assert_equal :"7bit", Bug::String.new(str).coderange_scan + + str = "a\xBE".force_encoding(enc) + assert_equal :valid, Bug::String.new(str).coderange_scan + end + + def test_usascii + enc = @a7 + str = "a" + str.force_encoding(enc) + assert_equal :"7bit", Bug::String.new(str).coderange_scan + + str = "a" * (@sizeof_voidp * 2) + str << "\xBE" + str.force_encoding(enc) + assert_equal :broken, Bug::String.new(str).coderange_scan + end + + def test_utf8 + enc = @u8 + str = "a" + str.force_encoding(enc) + assert_equal :"7bit", Bug::String.new(str).coderange_scan + + str = "a" * (@sizeof_voidp * 3) + str << "aa\xC2\x80" + str.force_encoding(enc) + assert_equal :valid, Bug::String.new(str).coderange_scan + + str = "a" * (@sizeof_voidp * 2) + str << "\xC2\x80" + str << "a" * (@sizeof_voidp * 2) + str.force_encoding(enc) + assert_equal :valid, Bug::String.new(str).coderange_scan + + str = "a" * (@sizeof_voidp * 2) + str << "\xC1\x80" + str << "a" * (@sizeof_voidp * 2) + str.force_encoding(enc) + assert_equal :broken, Bug::String.new(str).coderange_scan + end +end