* io.c (rb_io_set_encoding_by_bom): IO#set_encoding_by_bom to set
  the encoding by BOM if exists.  [Bug #15210]
This commit is contained in:
Nobuyoshi Nakada 2019-06-08 21:35:33 +09:00
Родитель bdc8b3789a
Коммит e717d6faa8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4BC7D6DF58D8DF60
3 изменённых файлов: 56 добавлений и 3 удалений

7
NEWS
Просмотреть файл

@ -86,6 +86,13 @@ GC::
Details on the algorithm and caveats can be found here:
https://bugs.ruby-lang.org/issues/15626
IO::
New method::
* Added IO#set_encoding_by_bom to check the BOM and set the external
encoding. [Bug #15210]
Integer::
Modified method::

44
io.c
Просмотреть файл

@ -6170,20 +6170,23 @@ io_strip_bom(VALUE io)
return 0;
}
static void
static rb_encoding *
io_set_encoding_by_bom(VALUE io)
{
int idx = io_strip_bom(io);
rb_io_t *fptr;
rb_encoding *extenc = NULL;
GetOpenFile(io, fptr);
if (idx) {
io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)),
rb_io_internal_encoding(io), Qnil);
extenc = rb_enc_from_index(idx);
io_encoding_set(fptr, rb_enc_from_encoding(extenc),
rb_io_internal_encoding(io), Qnil);
}
else {
fptr->encs.enc2 = NULL;
}
return extenc;
}
static VALUE
@ -8306,6 +8309,40 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io)
return io;
}
/*
* call-seq:
* ios.set_encoding_by_bom -> encoding or nil
*
* Checks if +ios+ starts with a BOM, and then consumes it and sets
* the external encoding. Returns the result encoding if found, or
* nil. If +ios+ is not binmode or its encoding has been set
* already, an exception will be raised.
*
* File.write("bom.txt", "\u{FEFF}abc")
* ios = File.open("bom.txt", "rb")
* ios.set_encoding_by_bom #=> #<Encoding:UTF-8>
*
* File.write("nobom.txt", "abc")
* ios = File.open("nobom.txt", "rb")
* ios.set_encoding_by_bom #=> nil
*/
static VALUE
rb_io_set_encoding_by_bom(VALUE io)
{
rb_io_t *fptr;
GetOpenFile(io, fptr);
if (!(fptr->mode & FMODE_BINMODE)) {
rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode");
}
if (fptr->encs.enc2) {
rb_raise(rb_eArgError, "encoding conversion is set");
}
if (!io_set_encoding_by_bom(io)) return Qnil;
return rb_enc_from_encoding(fptr->encs.enc);
}
/*
* call-seq:
* File.new(filename, mode="r" [, opt]) -> file
@ -13319,6 +13356,7 @@ Init_IO(void)
rb_define_method(rb_cIO, "external_encoding", rb_io_external_encoding, 0);
rb_define_method(rb_cIO, "internal_encoding", rb_io_internal_encoding, 0);
rb_define_method(rb_cIO, "set_encoding", rb_io_set_encoding, -1);
rb_define_method(rb_cIO, "set_encoding_by_bom", rb_io_set_encoding_by_bom, 0);
rb_define_method(rb_cIO, "autoclose?", rb_io_autoclose_p, 0);
rb_define_method(rb_cIO, "autoclose=", rb_io_set_autoclose, 1);

Просмотреть файл

@ -2097,6 +2097,10 @@ EOT
assert_equal(Encoding::UTF_8, result.encoding, message)
assert_equal(stripped, result, message)
end
File.open(path, "rb") {|f|
assert_equal(Encoding.find(name), f.set_encoding_by_bom)
}
}
end
end
@ -2139,6 +2143,10 @@ EOT
assert_equal(stripped, result, bug8323)
result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
assert_equal(stripped, result, bug8323)
File.open(path, "rb") {|f|
assert_nil(f.set_encoding_by_bom)
}
}
end