зеркало из https://github.com/github/ruby.git
* include/ruby/encoding.h (rb_str_transcode): add ecflags argument.
* transcode.c (econv_opts): extracted from str_transcode. (str_transcode_enc_args): extracted from str_transcode. (str_transcode0): extracted from str_transcode. (str_transcode): use econv_opts, str_transcode_enc_args, str_transcode0. (rb_str_transcode): call str_transcode0. (econv_primitive_insert_output): give the additional argument for rb_str_transcode. * io.c (make_writeconv): use invalid/undef flags. (io_fwrite): ditto. (rb_scan_open_args): give the additional argument for rb_str_transcode. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18808 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
51ad3df2ce
Коммит
c6d291b003
18
ChangeLog
18
ChangeLog
|
@ -1,3 +1,21 @@
|
|||
Sun Aug 24 15:21:28 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* include/ruby/encoding.h (rb_str_transcode): add ecflags argument.
|
||||
|
||||
* transcode.c (econv_opts): extracted from str_transcode.
|
||||
(str_transcode_enc_args): extracted from str_transcode.
|
||||
(str_transcode0): extracted from str_transcode.
|
||||
(str_transcode): use econv_opts, str_transcode_enc_args,
|
||||
str_transcode0.
|
||||
(rb_str_transcode): call str_transcode0.
|
||||
(econv_primitive_insert_output): give the additional argument for
|
||||
rb_str_transcode.
|
||||
|
||||
* io.c (make_writeconv): use invalid/undef flags.
|
||||
(io_fwrite): ditto.
|
||||
(rb_scan_open_args): give the additional argument for
|
||||
rb_str_transcode.
|
||||
|
||||
Sun Aug 24 13:27:42 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (str_transcode): check last hash only if 0 < argc.
|
||||
|
|
|
@ -194,7 +194,7 @@ rb_enc_dummy_p(rb_encoding *enc)
|
|||
return ENC_DUMMY_P(enc) != 0;
|
||||
}
|
||||
|
||||
VALUE rb_str_transcode(VALUE str, VALUE to);
|
||||
VALUE rb_str_transcode(VALUE str, VALUE to, int ecflags);
|
||||
|
||||
/* econv stuff */
|
||||
|
||||
|
|
33
io.c
33
io.c
|
@ -699,6 +699,12 @@ make_writeconv(rb_io_t *fptr)
|
|||
fptr->writeconv_initialized = 1;
|
||||
|
||||
ecflags = 0;
|
||||
|
||||
if (fptr->mode & FMODE_INVALID_MASK)
|
||||
ecflags |= (fptr->mode / (FMODE_INVALID_MASK/ECONV_INVALID_MASK)) & ECONV_INVALID_MASK;
|
||||
if (fptr->mode & FMODE_UNDEF_MASK)
|
||||
ecflags |= (fptr->mode / (FMODE_UNDEF_MASK/ECONV_UNDEF_MASK)) & ECONV_UNDEF_MASK;
|
||||
|
||||
#ifdef TEXTMODE_NEWLINE_ENCODER
|
||||
if (NEED_NEWLINE_ENCODER(fptr))
|
||||
ecflags |= TEXTMODE_NEWLINE_ENCODER;
|
||||
|
@ -740,18 +746,31 @@ io_fwrite(VALUE str, rb_io_t *fptr)
|
|||
long len, n, r, l, offset = 0;
|
||||
|
||||
if (NEED_WRITECONV(fptr)) {
|
||||
VALUE common_encoding = Qnil;
|
||||
make_writeconv(fptr);
|
||||
|
||||
if (fptr->writeconv) {
|
||||
if (!NIL_P(fptr->writeconv_stateless)) {
|
||||
str = rb_str_transcode(str, fptr->writeconv_stateless);
|
||||
}
|
||||
str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT);
|
||||
if (!NIL_P(fptr->writeconv_stateless))
|
||||
common_encoding = fptr->writeconv_stateless;
|
||||
}
|
||||
else {
|
||||
if (fptr->enc2)
|
||||
str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc2));
|
||||
common_encoding = rb_enc_from_encoding(fptr->enc2);
|
||||
else
|
||||
str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc));
|
||||
common_encoding = rb_enc_from_encoding(fptr->enc);
|
||||
}
|
||||
|
||||
if (!NIL_P(common_encoding)) {
|
||||
int ecflags = 0;
|
||||
if (fptr->mode & FMODE_INVALID_MASK)
|
||||
ecflags |= (fptr->mode / (FMODE_INVALID_MASK/ECONV_INVALID_MASK)) & ECONV_INVALID_MASK;
|
||||
if (fptr->mode & FMODE_UNDEF_MASK)
|
||||
ecflags |= (fptr->mode / (FMODE_UNDEF_MASK/ECONV_UNDEF_MASK)) & ECONV_UNDEF_MASK;
|
||||
str = rb_str_transcode(str, common_encoding, ecflags);
|
||||
}
|
||||
|
||||
if (fptr->writeconv) {
|
||||
str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4622,7 +4641,7 @@ rb_scan_open_args(int argc, VALUE *argv,
|
|||
static VALUE fs_enc;
|
||||
if (!fs_enc)
|
||||
fs_enc = rb_enc_from_encoding(fs_encoding);
|
||||
fname = rb_str_transcode(fname, fs_enc);
|
||||
fname = rb_str_transcode(fname, fs_enc, 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1202,5 +1202,68 @@ EOT
|
|||
}
|
||||
}
|
||||
end
|
||||
|
||||
def test_invalid_w
|
||||
with_tmpdir {
|
||||
invalid_utf8 = "a\x80b".force_encoding("utf-8")
|
||||
open("t.txt", "w:euc-jp", :invalid => :replace) {|f|
|
||||
assert_nothing_raised { f.write invalid_utf8 }
|
||||
}
|
||||
assert_equal("a?b", File.read("t.txt"))
|
||||
|
||||
open("t.txt", "w:euc-jp", :invalid => :ignore) {|f|
|
||||
assert_nothing_raised { f.write invalid_utf8 }
|
||||
}
|
||||
assert_equal("ab", File.read("t.txt"))
|
||||
|
||||
open("t.txt", "w:euc-jp", :undef => :replace) {|f|
|
||||
assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 }
|
||||
}
|
||||
open("t.txt", "w:euc-jp", :undef => :ignore) {|f|
|
||||
assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 }
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def test_undef_w_stateless
|
||||
with_tmpdir {
|
||||
generate_file("t.txt", "a\uFFFDb")
|
||||
open("t.txt", "w:euc-jp:utf-8", :undef => :replace) {|f|
|
||||
assert_nothing_raised { f.write "a\uFFFDb" }
|
||||
}
|
||||
assert_equal("a?b", File.read("t.txt"))
|
||||
open("t.txt", "w:euc-jp:utf-8", :undef => :ignore) {|f|
|
||||
assert_nothing_raised { f.write "a\uFFFDb" }
|
||||
}
|
||||
assert_equal("ab", File.read("t.txt"))
|
||||
open("t.txt", "w:euc-jp:utf-8", :invalid => :replace) {|f|
|
||||
assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
|
||||
}
|
||||
open("t.txt", "w:euc-jp:utf-8", :invalid => :ignore) {|f|
|
||||
assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def test_undef_w_stateful
|
||||
with_tmpdir {
|
||||
generate_file("t.txt", "a\uFFFDb")
|
||||
open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace) {|f|
|
||||
assert_nothing_raised { f.write "a\uFFFDb" }
|
||||
}
|
||||
assert_equal("a?b", File.read("t.txt"))
|
||||
open("t.txt", "w:iso-2022-jp:utf-8", :undef => :ignore) {|f|
|
||||
assert_nothing_raised { f.write "a\uFFFDb" }
|
||||
}
|
||||
assert_equal("ab", File.read("t.txt"))
|
||||
open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace) {|f|
|
||||
assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
|
||||
}
|
||||
open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :ignore) {|f|
|
||||
assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
|
141
transcode.c
141
transcode.c
|
@ -1673,58 +1673,49 @@ str_transcoding_resize(VALUE destination, int len, int new_len)
|
|||
}
|
||||
|
||||
static int
|
||||
str_transcode(int argc, VALUE *argv, VALUE *self)
|
||||
econv_opts(VALUE opt)
|
||||
{
|
||||
VALUE v;
|
||||
int options = 0;
|
||||
v = rb_hash_aref(opt, sym_invalid);
|
||||
if (NIL_P(v)) {
|
||||
}
|
||||
else if (v==sym_ignore) {
|
||||
options |= ECONV_INVALID_IGNORE;
|
||||
}
|
||||
else if (v==sym_replace) {
|
||||
options |= ECONV_INVALID_REPLACE;
|
||||
v = rb_hash_aref(opt, sym_replace);
|
||||
}
|
||||
else {
|
||||
rb_raise(rb_eArgError, "unknown value for invalid character option");
|
||||
}
|
||||
v = rb_hash_aref(opt, sym_undef);
|
||||
if (NIL_P(v)) {
|
||||
}
|
||||
else if (v==sym_ignore) {
|
||||
options |= ECONV_UNDEF_IGNORE;
|
||||
}
|
||||
else if (v==sym_replace) {
|
||||
options |= ECONV_UNDEF_REPLACE;
|
||||
}
|
||||
else {
|
||||
rb_raise(rb_eArgError, "unknown value for undefined character option");
|
||||
}
|
||||
return options;
|
||||
}
|
||||
|
||||
static int
|
||||
str_transcode_enc_args(VALUE str, VALUE arg1, VALUE arg2,
|
||||
const char **sname, rb_encoding **senc,
|
||||
const char **dname, rb_encoding **denc)
|
||||
{
|
||||
VALUE dest;
|
||||
VALUE str = *self;
|
||||
long blen, slen;
|
||||
unsigned char *buf, *bp, *sp;
|
||||
const unsigned char *fromp;
|
||||
rb_encoding *from_enc, *to_enc;
|
||||
const char *from_e, *to_e;
|
||||
int from_encidx, to_encidx;
|
||||
VALUE from_encval, to_encval;
|
||||
VALUE opt;
|
||||
int options = 0;
|
||||
|
||||
if (0 < argc)
|
||||
opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
|
||||
else
|
||||
opt = Qnil;
|
||||
if (!NIL_P(opt)) {
|
||||
VALUE v;
|
||||
|
||||
argc--;
|
||||
v = rb_hash_aref(opt, sym_invalid);
|
||||
if (NIL_P(v)) {
|
||||
}
|
||||
else if (v==sym_ignore) {
|
||||
options |= ECONV_INVALID_IGNORE;
|
||||
}
|
||||
else if (v==sym_replace) {
|
||||
options |= ECONV_INVALID_REPLACE;
|
||||
v = rb_hash_aref(opt, sym_replace);
|
||||
}
|
||||
else {
|
||||
rb_raise(rb_eArgError, "unknown value for invalid character option");
|
||||
}
|
||||
v = rb_hash_aref(opt, sym_undef);
|
||||
if (NIL_P(v)) {
|
||||
}
|
||||
else if (v==sym_ignore) {
|
||||
options |= ECONV_UNDEF_IGNORE;
|
||||
}
|
||||
else if (v==sym_replace) {
|
||||
options |= ECONV_UNDEF_REPLACE;
|
||||
}
|
||||
else {
|
||||
rb_raise(rb_eArgError, "unknown value for undefined character option");
|
||||
}
|
||||
}
|
||||
if (argc < 1 || argc > 2) {
|
||||
rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
|
||||
}
|
||||
if ((to_encidx = rb_to_encoding_index(to_encval = argv[0])) < 0) {
|
||||
if ((to_encidx = rb_to_encoding_index(to_encval = arg1)) < 0) {
|
||||
to_enc = 0;
|
||||
to_encidx = 0;
|
||||
to_e = StringValueCStr(to_encval);
|
||||
|
@ -1733,12 +1724,12 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
to_enc = rb_enc_from_index(to_encidx);
|
||||
to_e = rb_enc_name(to_enc);
|
||||
}
|
||||
if (argc==1) {
|
||||
if (NIL_P(arg2)) {
|
||||
from_encidx = rb_enc_get_index(str);
|
||||
from_enc = rb_enc_from_index(from_encidx);
|
||||
from_e = rb_enc_name(from_enc);
|
||||
}
|
||||
else if ((from_encidx = rb_to_encoding_index(from_encval = argv[1])) < 0) {
|
||||
else if ((from_encidx = rb_to_encoding_index(from_encval = arg2)) < 0) {
|
||||
from_enc = 0;
|
||||
from_e = StringValueCStr(from_encval);
|
||||
}
|
||||
|
@ -1747,6 +1738,31 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
from_e = rb_enc_name(from_enc);
|
||||
}
|
||||
|
||||
*sname = from_e;
|
||||
*senc = from_enc;
|
||||
*dname = to_e;
|
||||
*denc = to_enc;
|
||||
return to_encidx;
|
||||
}
|
||||
|
||||
static int
|
||||
str_transcode0(int argc, VALUE *argv, VALUE *self, int options)
|
||||
{
|
||||
VALUE dest;
|
||||
VALUE str = *self;
|
||||
long blen, slen;
|
||||
unsigned char *buf, *bp, *sp;
|
||||
const unsigned char *fromp;
|
||||
rb_encoding *from_enc, *to_enc;
|
||||
const char *from_e, *to_e;
|
||||
int to_encidx;
|
||||
|
||||
if (argc < 1 || argc > 2) {
|
||||
rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
|
||||
}
|
||||
|
||||
to_encidx = str_transcode_enc_args(str, argv[0], argc==1 ? Qnil : argv[1], &from_e, &from_enc, &to_e, &to_enc);
|
||||
|
||||
if (from_enc && from_enc == to_enc) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -1782,6 +1798,22 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
return to_encidx;
|
||||
}
|
||||
|
||||
static int
|
||||
str_transcode(int argc, VALUE *argv, VALUE *self)
|
||||
{
|
||||
VALUE opt;
|
||||
int options = 0;
|
||||
|
||||
if (0 < argc) {
|
||||
opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
|
||||
if (!NIL_P(opt)) {
|
||||
argc--;
|
||||
options = econv_opts(opt);
|
||||
}
|
||||
}
|
||||
return str_transcode0(argc, argv, self, options);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
str_encode_associate(VALUE str, int encidx)
|
||||
{
|
||||
|
@ -1850,9 +1882,16 @@ str_encode(int argc, VALUE *argv, VALUE str)
|
|||
}
|
||||
|
||||
VALUE
|
||||
rb_str_transcode(VALUE str, VALUE to)
|
||||
rb_str_transcode(VALUE str, VALUE to, int flags)
|
||||
{
|
||||
return str_encode(1, &to, str);
|
||||
int argc = 1;
|
||||
VALUE *argv = &to;
|
||||
VALUE newstr = str;
|
||||
int encidx = str_transcode0(argc, argv, &newstr, flags);
|
||||
|
||||
if (encidx < 0) return rb_str_dup(str);
|
||||
RBASIC(newstr)->klass = rb_obj_class(str);
|
||||
return str_encode_associate(newstr, encidx);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2305,7 +2344,7 @@ econv_primitive_insert_output(VALUE self, VALUE string)
|
|||
|
||||
StringValue(string);
|
||||
insert_enc = rb_econv_encoding_to_insert_output(ec);
|
||||
string = rb_str_transcode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)));
|
||||
string = rb_str_transcode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)), 0);
|
||||
|
||||
ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc);
|
||||
if (ret == -1)
|
||||
|
|
Загрузка…
Ссылка в новой задаче