* configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from

MINIRUBY since miniruby might not be able to load DLL.

* test/ruby/test_m17n.rb: move tests from bootstrap test.

* encoding.c (enc_find): should check name if ASCII compatible.

* string.c (rb_str_end_with): should check character boundary.

* encoding.c (rb_enc_compatible): encoding must be ASCII
  compatible before checking ENC_CODERANGE_7BIT.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15167 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2008-01-22 03:59:53 +00:00
Родитель 157664b9f3
Коммит 65a8185eb2
6 изменённых файлов: 137 добавлений и 221 удалений

Просмотреть файл

@ -1,3 +1,17 @@
Tue Jan 22 12:57:07 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from
MINIRUBY since miniruby might not be able to load DLL.
* test/ruby/test_m17n.rb: move tests from bootstrap test.
* encoding.c (enc_find): should check name if ASCII compatible.
* string.c (rb_str_end_with): should check character boundary.
* encoding.c (rb_enc_compatible): encoding must be ASCII
compatible before checking ENC_CODERANGE_7BIT.
Tue Jan 22 09:26:47 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (rb_str_each_char): iterates over a shadow.

Просмотреть файл

@ -1,200 +0,0 @@
assert_normal_exit %q{
"abcdefghij\xf0".force_encoding("utf-8").reverse.inspect
}, '[ruby-dev:32448]'
assert_equal 'true', %q{
"abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp")) ==
"a\xa1\xa1c".force_encoding("euc-jp")
}
assert_equal 'ok', %q{
begin
if ("\xa1\xa2\xa1\xa3").force_encoding("euc-jp").split(//) ==
["\xa1\xa2".force_encoding("euc-jp"), "\xa1\xa3".force_encoding("euc-jp")]
:ok
else
:ng
end
rescue
:ng
end
}, '[ruby-dev:32452]'
assert_equal 'ok', %q{
begin
"\xa1\xa1".force_encoding("euc-jp") + "\xa1".force_encoding("ascii-8bit")
:ng
rescue ArgumentError
:ok
end
}
assert_equal 'ok', %q{
begin
"%s%s" % ["\xc2\xa1".force_encoding("sjis"), "\xc2\xa1".force_encoding("euc-jp")]
rescue ArgumentError
:ok
end
}
assert_equal '0', %q{
"\xa1\xa2".force_encoding("euc-jp").count("z")
}
assert_equal '1', %q{
"\xa1\xa2".force_encoding("euc-jp").delete("z").length
}
assert_equal 'false', %q{
"\xa1\xa2\xa3\xa4".force_encoding("euc-jp").include?("\xa3".force_encoding("euc-jp"))
}
assert_equal 'ok', %q{
"\xa1\xa2\xa3\xa4".force_encoding("euc-jp").index("\xa3".force_encoding("euc-jp")) or :ok
}
assert_equal 'ok', %q{
"\xa1\xa2\xa3\xa4".force_encoding("euc-jp").rindex("\xa3".force_encoding("euc-jp")) or :ok
}
assert_equal 'false', %q{
s1 = "\xa1\xa1".force_encoding("euc-jp")
s2 = s1.dup
(94*94+94).times { s2.next! }
s1 == s2
}
assert_equal 'ok', %q{
"\xa1\xa2a\xa3\xa4".force_encoding("euc-jp").scan(/a/)
:ok
}
assert_equal 'ok', %q{
"\xa1\xa2a\xa3\xa4".force_encoding("euc-jp").split(/a/)
:ok
}
assert_equal 'ok', %q{
s1 = "\xa1\xa2".force_encoding("euc-jp")
s2 = "\xa1\xa2".force_encoding("sjis")
begin
s1.upto(s2) {|x| break }
:ng
rescue ArgumentError
:ok
end
}
assert_equal 'true', %q{
"%s" % "\xa1\xa1".force_encoding("euc-jp") ==
"\xa1\xa1".force_encoding("euc-jp")
}
assert_equal 'a', %q{
s = "a".dup.force_encoding("EUC-JP")
s.length
s[0,2]
}
assert_equal 'ok', %q{
s1 = "\x81\x41".force_encoding("sjis")
s2 = "\x81\x61".force_encoding("sjis")
s1.casecmp(s2) == 0 ? :ng : :ok
}
assert_equal 'EUC-JP', %q{ ("\xc2\xa1 %s".force_encoding("EUC-JP") % "foo").encoding.name }
assert_equal 'true', %q{ "\xa1\xa2\xa3\xa4".force_encoding("euc-jp")["\xa2\xa3".force_encoding("euc-jp")] == nil }
assert_equal 'ok', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
begin
s["\xb0\xa3"] = "foo"
:ng
rescue ArgumentError
:ok
end
}
assert_equal 'EUC-JP', %q{ "\xa3\xb0".force_encoding("EUC-JP").center(10).encoding.name }
assert_equal 'ok', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
begin
s.chomp("\xa3\xb4".force_encoding("shift_jis"))
:ng
rescue ArgumentError
:ok
end
}
assert_equal 'ok', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
begin
s.count("\xa3\xb0".force_encoding("ascii-8bit"))
:ng
rescue ArgumentError
:ok
end
}
assert_equal 'ok', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
begin
s.delete("\xa3\xb2".force_encoding("ascii-8bit"))
:ng
rescue ArgumentError
:ok
end
}
assert_equal 'ok', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
begin
s.each_line("\xa3\xb1".force_encoding("ascii-8bit")) {|l| }
:ng
rescue ArgumentError
:ok
end
}
assert_equal 'true', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
s.gsub(/\xa3\xb1/e, "z") == "\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
}
assert_equal 'false', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
s.include?("\xb0\xa3".force_encoding("euc-jp"))
}
assert_equal 'ok', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
s.index("\xb3\xa3".force_encoding("euc-jp")) or :ok
}
assert_equal 'ok', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
s.insert(-1, "a")
:ok
}
assert_finish 1, %q{ "\xa3\xfe".force_encoding("euc-jp").next }
assert_equal 'ok', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
begin
s.rindex("\xb1\xa3".force_encoding("ascii-8bit"))
:ng
rescue ArgumentError
:ok
end
}
assert_equal 'true', %q{
s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
s.split("\xa3\xb1".force_encoding("euc-jp")) == [
"\xa3\xb0".force_encoding("euc-jp"),
"\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
]
}, '[ruby-dev:32452]'
assert_normal_exit %q{ // =~ :a }

Просмотреть файл

@ -1370,7 +1370,7 @@ if test x"$cross_compiling" = xyes; then
PREP=fake.rb
RUNRUBY='$(MINIRUBY) -I`cd $(srcdir)/lib; pwd`'
else
MINIRUBY='./miniruby$(EXEEXT) -I$(srcdir)/lib -I$(EXTOUT)/$(arch)'
MINIRUBY='./miniruby$(EXEEXT) -I$(srcdir)/lib'
PREP='miniruby$(EXEEXT)'
RUNRUBY='$(MINIRUBY) $(srcdir)/runruby.rb --extout=$(EXTOUT)'
fi

Просмотреть файл

@ -640,7 +640,7 @@ rb_encoding*
rb_enc_compatible(VALUE str1, VALUE str2)
{
int idx1, idx2;
rb_encoding *enc;
rb_encoding *enc1, *enc2;
idx1 = rb_enc_get_index(str1);
idx2 = rb_enc_get_index(str2);
@ -648,6 +648,10 @@ rb_enc_compatible(VALUE str1, VALUE str2)
if (idx1 == idx2) {
return rb_enc_from_index(idx1);
}
enc1 = rb_enc_from_index(idx1);
enc2 = rb_enc_from_index(idx2);
if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2))
return 0;
if (BUILTIN_TYPE(str1) != T_STRING) {
VALUE tmp = str1;
int idx0 = idx1;
@ -664,17 +668,15 @@ rb_enc_compatible(VALUE str1, VALUE str2)
cr2 = rb_enc_str_coderange(str2);
if (cr1 != cr2) {
/* may need to handle ENC_CODERANGE_BROKEN */
if (cr1 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx2);
if (cr2 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx1);
if (cr1 == ENC_CODERANGE_7BIT) return enc2;
}
if (cr2 == ENC_CODERANGE_7BIT) {
if (idx1 == 0) return rb_enc_from_index(idx2);
return rb_enc_from_index(idx1);
if (idx1 == 0) return enc2;
return enc1;
}
}
if (cr1 == ENC_CODERANGE_7BIT &&
rb_enc_asciicompat(enc = rb_enc_from_index(idx2)))
return enc;
if (cr1 == ENC_CODERANGE_7BIT)
return enc2;
}
return 0;
}
@ -908,7 +910,11 @@ static VALUE
enc_find(VALUE klass, VALUE enc)
{
int idx;
if (SYMBOL_P(enc)) enc = rb_id2str(SYM2ID(enc));
if (!rb_enc_asciicompat(rb_enc_get(enc))) {
rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
}
idx = rb_enc_find_index(StringValueCStr(enc));
if (idx < 0) {
rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));

Просмотреть файл

@ -5522,14 +5522,19 @@ static VALUE
rb_str_end_with(int argc, VALUE *argv, VALUE str)
{
int i;
char *p, *s;
rb_encoding *enc;
for (i=0; i<argc; i++) {
VALUE tmp = rb_check_string_type(argv[i]);
if (NIL_P(tmp)) continue;
rb_enc_check(str, tmp);
enc = rb_enc_check(str, tmp);
if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
if (memcmp(RSTRING_PTR(str) + RSTRING_LEN(str) - RSTRING_LEN(tmp),
RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
p = RSTRING_PTR(str);
s = p + RSTRING_LEN(str) - RSTRING_LEN(tmp);
if (rb_enc_left_char_head(p, s, enc) != s)
continue;
if (memcmp(s, p, RSTRING_LEN(tmp)) == 0)
return Qtrue;
}
return Qfalse;

Просмотреть файл

@ -690,6 +690,9 @@ class TestM17N < Test::Unit::TestCase
#assert_raise(ArgumentError) { s("%c") % 0xc2a1 }
assert_strenc("\u{c2a1}", 'UTF-8', u("%c") % 0xc2a1)
assert_strenc("\u{c2}", 'UTF-8', u("%c") % 0xc2)
assert_raise(ArgumentError) {
"%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")]
}
end
def test_sprintf_p
@ -744,6 +747,7 @@ class TestM17N < Test::Unit::TestCase
assert_strenc("\x00", 'EUC-JP', e("%s") % e("\x00"))
assert_strenc("\x00", 'Windows-31J', s("%s") % s("\x00"))
assert_strenc("\x00", 'UTF-8', u("%s") % u("\x00"))
assert_equal("EUC-JP", (e("\xc2\xa1 %s") % "foo").encoding.name)
end
def test_str_lt
@ -791,29 +795,34 @@ class TestM17N < Test::Unit::TestCase
assert_equal(nil, u("\xc2\xa1\xc2\xa2\xc2\xa3")[u("\xa1\xc2")])
assert_raise(ArgumentError) { u("\xc2\xa1\xc2\xa2\xc2\xa3")[a("\xa1\xc2")] }
assert_nil(e("\xa1\xa2\xa3\xa4")[e("\xa2\xa3")])
end
def test_aset
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_raise(ArgumentError){s["\xb0\xa3"] = "foo"}
end
def test_str_center
assert_encoding("EUC-JP", "a".center(5, "\xa1\xa2".force_encoding("euc-jp")).encoding)
assert_encoding("EUC-JP", "a".center(5, e("\xa1\xa2")).encoding)
assert_encoding("EUC-JP", e("\xa3\xb0").center(10).encoding)
end
def test_squeeze
s = "\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
assert_equal("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4".force_encoding("euc-jp"), s.squeeze)
s = e("\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4")
assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4"), s.squeeze)
end
def test_tr
s = "\x81\x41".force_encoding("shift_jis")
s = s("\x81\x41")
assert_equal(s.tr("A", "B"), s)
assert_equal(s.tr_s("A", "B"), s)
assert_nothing_raised {
"a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "a".force_encoding("EUC-JP"))
"a".force_encoding("ASCII-8BIT").tr(a("a"), a("a"))
}
assert_equal("\xA1\xA1".force_encoding("EUC-JP"),
"a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP")))
assert_equal(e("\xA1\xA1"), a("a").tr(a("a"), e("\xA1\xA1")))
end
def test_tr_s
@ -821,6 +830,40 @@ class TestM17N < Test::Unit::TestCase
"a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP")))
end
def test_count
assert_equal(0, e("\xa1\xa2").count("z"))
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_raise(ArgumentError){s.count(a("\xa3\xb0"))}
end
def test_delete
assert_equal(1, e("\xa1\xa2").delete("z").length)
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_raise(ArgumentError){s.delete(a("\xa3\xb2"))}
end
def test_include?
assert_equal(false, e("\xa1\xa2\xa3\xa4").include?(e("\xa3")))
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_equal(false, s.include?(e("\xb0\xa3")))
end
def test_index
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_nil(s.index(e("\xb3\xa3")))
assert_nil(e("\xa1\xa2\xa3\xa4").index(e("\xa3")))
assert_nil(e("\xa1\xa2\xa3\xa4").rindex(e("\xa3")))
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_raise(ArgumentError){s.rindex(a("\xb1\xa3"))}
end
def test_next
s1 = e("\xa1\xa1")
s2 = s1.dup
(94*94+94).times { s2.next! }
assert_not_equal(s1, s2)
end
def test_sub
s = "abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp"))
assert_encoding("EUC-JP", s.encoding)
@ -828,6 +871,40 @@ class TestM17N < Test::Unit::TestCase
assert_equal(Encoding::EUC_JP, "\xa4\xa2".force_encoding("euc-jp").gsub(/./, '\&').encoding)
end
def test_insert
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4a"), s.insert(-1, "a"))
end
def test_scan
assert_equal(["a"], e("\xa1\xa2a\xa3\xa4").scan(/a/))
end
def test_upto
s1 = e("\xa1\xa2")
s2 = s("\xa1\xa2")
assert_raise(ArgumentError){s1.upto(s2) {|x| break }}
end
def test_casecmp
s1 = s("\x81\x41")
s2 = s("\x81\x61")
assert_not_equal(0, s1.casecmp(s2))
end
def test_reverse
assert_equal(u("\xf0jihgfedcba"), u("abcdefghij\xf0").reverse)
end
def test_plus
assert_raise(ArgumentError){u("\xe3\x81\x82") + a("\xa1")}
end
def test_chomp
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_raise(ArgumentError){s.chomp(s("\xa3\xb4"))}
end
def test_gsub
s = 'abc'
s.ascii_only?
@ -844,14 +921,21 @@ class TestM17N < Test::Unit::TestCase
"\xc2\xa1".force_encoding("utf-8")
}
}
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_equal(e("\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4"), s.gsub(/\xa3\xb1/e, "z"))
end
def test_end_with
s1 = "\x81\x40".force_encoding("sjis")
s1 = s("\x81\x40")
s2 = "@"
assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")
end
def test_each_line
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
assert_raise(ArgumentError){s.each_line(a("\xa3\xb1")) {|l| }}
end
def test_each_char
a = [e("\xa4\xa2"), "b", e("\xa4\xa4"), "c"]
s = "\xa4\xa2b\xa4\xa4c".force_encoding("euc-jp")
@ -860,6 +944,13 @@ class TestM17N < Test::Unit::TestCase
def test_regexp_match
assert_equal([0,0], //.match("\xa1\xa1".force_encoding("euc-jp"),-1).offset(0))
assert_equal(0, // =~ :a)
end
def test_split
assert_equal(e("\xa1\xa2\xa1\xa3").split(//),
[e("\xa1\xa2"), e("\xa1\xa3")],
'[ruby-dev:32452]')
end
def test_nonascii_method_name