зеркало из https://github.com/github/ruby.git
Work around issue transcoding issue with non-ASCII compatible encodings and xml escaping
When using a non-ASCII compatible source and destination encoding and xml escaping (the :xml option to String#encode), the resulting string was broken, as it used the correct non-ASCII compatible encoding, but contained data that was ASCII-compatible instead of compatible with the string's encoding. Work around this issue by detecting the case where both the source and destination encoding are non-ASCII compatible, and transcoding the source string from the non-ASCII compatible encoding to UTF-8. The xml escaping code will correctly handle the UTF-8 source string and the return the correctly encoded and escaped value. Fixes [Bug #12052] Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
This commit is contained in:
Родитель
391abc543c
Коммит
e86c1f6fc5
|
@ -126,6 +126,25 @@ class TestTranscode < Test::Unit::TestCase
|
|||
assert_equal("D\xFCrst".force_encoding('iso-8859-2'), "D\xFCrst".encode('iso-8859-2', 'iso-8859-1'))
|
||||
end
|
||||
|
||||
def test_encode_xml_multibyte
|
||||
encodings = %w'UTF-8 UTF-16LE UTF-16BE UTF-32LE UTF-32BE'
|
||||
encodings.each do |src_enc|
|
||||
encodings.each do |dst_enc|
|
||||
escaped = "<>".encode(src_enc).encode(dst_enc, :xml=>:text)
|
||||
assert_equal("<>", escaped.encode('UTF-8'), "failed encoding #{src_enc} to #{dst_enc} with xml: :text")
|
||||
|
||||
escaped = '<">'.encode(src_enc).encode(dst_enc, :xml=>:attr)
|
||||
assert_equal('"<">"', escaped.encode('UTF-8'), "failed encoding #{src_enc} to #{dst_enc} with xml: :attr")
|
||||
|
||||
escaped = "<>".encode(src_enc).force_encoding("UTF-8").encode(dst_enc, src_enc, :xml=>:text)
|
||||
assert_equal("<>", escaped.encode('UTF-8'), "failed encoding #{src_enc} to #{dst_enc} with xml: :text")
|
||||
|
||||
escaped = '<">'.encode(src_enc).force_encoding("UTF-8").encode(dst_enc, src_enc, :xml=>:attr)
|
||||
assert_equal('"<">"', escaped.encode('UTF-8'), "failed encoding #{src_enc} to #{dst_enc} with xml: :attr")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_ascii_range
|
||||
encodings = [
|
||||
'US-ASCII', 'ASCII-8BIT',
|
||||
|
|
|
@ -2719,6 +2719,12 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
|
|||
}
|
||||
}
|
||||
else {
|
||||
if (senc && denc && !rb_enc_asciicompat(senc) && !rb_enc_asciicompat(denc)) {
|
||||
rb_encoding *utf8 = rb_utf8_encoding();
|
||||
str = rb_str_conv_enc(str, senc, utf8);
|
||||
senc = utf8;
|
||||
sname = "UTF-8";
|
||||
}
|
||||
if (encoding_equal(sname, dname)) {
|
||||
sname = "";
|
||||
dname = "";
|
||||
|
|
Загрузка…
Ссылка в новой задаче