* io.c (io_encname_bom_p): check BOM prefix only, not including
  UTF prefix.
* io.c (parse_mode_enc): warn BOM with non-UTF encoding.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53084 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2015-12-13 09:48:27 +00:00
Родитель ce6f0e36a3
Коммит b861d5473c
3 изменённых файлов: 42 добавлений и 26 удалений

Просмотреть файл

@ -1,4 +1,9 @@
Sun Dec 13 18:45:12 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
Sun Dec 13 18:46:31 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
* io.c (io_encname_bom_p): check BOM prefix only, not including
UTF prefix.
* io.c (parse_mode_enc): warn BOM with non-UTF encoding.
* io.c (parse_mode_enc): fix buffer overflow.

47
io.c
Просмотреть файл

@ -4852,11 +4852,14 @@ rb_io_fmode_modestr(int fmode)
}
}
static const char bom_prefix[] = "bom|";
static const char utf_prefix[] = "utf-";
enum {bom_prefix_len = (int)sizeof(bom_prefix) - 1};
enum {utf_prefix_len = (int)sizeof(utf_prefix) - 1};
static int
io_encname_bom_p(const char *name, long len)
{
static const char bom_prefix[] = "bom|utf-";
enum {bom_prefix_len = (int)sizeof(bom_prefix) - 1};
return len > bom_prefix_len && STRNCASECMP(name, bom_prefix, bom_prefix_len) == 0;
}
@ -5064,37 +5067,31 @@ parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p, int
int idx, idx2;
int fmode = fmode_p ? *fmode_p : 0;
rb_encoding *ext_enc, *int_enc;
long len;
/* parse estr as "enc" or "enc2:enc" or "enc:-" */
p = strrchr(estr, ':');
if (p) {
long len = (p++) - estr;
if (len == 0 || len > ENCODING_MAXNAMELEN)
idx = -1;
len = p ? (p++ - estr) : (long)strlen(estr);
if ((fmode & FMODE_SETENC_BY_BOM) || io_encname_bom_p(estr, len)) {
estr += bom_prefix_len;
len -= bom_prefix_len;
if (!STRNCASECMP(estr, utf_prefix, utf_prefix_len)) {
fmode |= FMODE_SETENC_BY_BOM;
}
else {
if (io_encname_bom_p(estr, len)) {
fmode |= FMODE_SETENC_BY_BOM;
estr += 4;
len -= 4;
}
rb_warn("BOM with non-UTF encoding %s is nonsense", estr);
fmode &= ~FMODE_SETENC_BY_BOM;
}
}
if (len == 0 || len > ENCODING_MAXNAMELEN) {
idx = -1;
}
else {
if (p) {
memcpy(encname, estr, len);
encname[len] = '\0';
estr = encname;
idx = rb_enc_find_index(encname);
}
}
else {
long len = strlen(estr);
if (io_encname_bom_p(estr, len)) {
fmode |= FMODE_SETENC_BY_BOM;
estr += 4;
len -= 4;
if (len > 0 && len <= ENCODING_MAXNAMELEN) {
memcpy(encname, estr, len);
encname[len] = '\0';
estr = encname;
}
}
idx = rb_enc_find_index(estr);
}

Просмотреть файл

@ -2095,6 +2095,20 @@ EOT
end;
end
def test_bom_non_utf
enc = nil
assert_warn(/BOM/) {
open(__FILE__, "r:bom|us-ascii") {|f| enc = f.external_encoding}
}
assert_equal(Encoding::US_ASCII, enc)
assert_warn(/BOM/) {
open(IO::NULL, "w:bom|us-ascii") {|f| enc = f.external_encoding}
}
assert_equal(Encoding::US_ASCII, enc)
end
def test_cbuf
with_tmpdir {
fn = "tst"