* include/ruby/encoding.h (rb_str_transcode): add ecflags argument.

* transcode.c (econv_opts): extracted from str_transcode.
  (str_transcode_enc_args): extracted from str_transcode.
  (str_transcode0): extracted from str_transcode.
  (str_transcode): use econv_opts, str_transcode_enc_args,
  str_transcode0.
  (rb_str_transcode): call str_transcode0.
  (econv_primitive_insert_output): give the additional argument for
  rb_str_transcode.

* io.c (make_writeconv): use invalid/undef flags.
  (io_fwrite): ditto.
  (rb_scan_open_args): give the additional argument for
  rb_str_transcode.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18808 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-08-24 06:25:24 +00:00
Родитель 51ad3df2ce
Коммит c6d291b003
5 изменённых файлов: 198 добавлений и 59 удалений

Просмотреть файл

@ -1,3 +1,21 @@
Sun Aug 24 15:21:28 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/encoding.h (rb_str_transcode): add ecflags argument.
* transcode.c (econv_opts): extracted from str_transcode.
(str_transcode_enc_args): extracted from str_transcode.
(str_transcode0): extracted from str_transcode.
(str_transcode): use econv_opts, str_transcode_enc_args,
str_transcode0.
(rb_str_transcode): call str_transcode0.
(econv_primitive_insert_output): give the additional argument for
rb_str_transcode.
* io.c (make_writeconv): use invalid/undef flags.
(io_fwrite): ditto.
(rb_scan_open_args): give the additional argument for
rb_str_transcode.
Sun Aug 24 13:27:42 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (str_transcode): check last hash only if 0 < argc.

Просмотреть файл

@ -194,7 +194,7 @@ rb_enc_dummy_p(rb_encoding *enc)
return ENC_DUMMY_P(enc) != 0;
}
VALUE rb_str_transcode(VALUE str, VALUE to);
VALUE rb_str_transcode(VALUE str, VALUE to, int ecflags);
/* econv stuff */

33
io.c
Просмотреть файл

@ -699,6 +699,12 @@ make_writeconv(rb_io_t *fptr)
fptr->writeconv_initialized = 1;
ecflags = 0;
if (fptr->mode & FMODE_INVALID_MASK)
ecflags |= (fptr->mode / (FMODE_INVALID_MASK/ECONV_INVALID_MASK)) & ECONV_INVALID_MASK;
if (fptr->mode & FMODE_UNDEF_MASK)
ecflags |= (fptr->mode / (FMODE_UNDEF_MASK/ECONV_UNDEF_MASK)) & ECONV_UNDEF_MASK;
#ifdef TEXTMODE_NEWLINE_ENCODER
if (NEED_NEWLINE_ENCODER(fptr))
ecflags |= TEXTMODE_NEWLINE_ENCODER;
@ -740,18 +746,31 @@ io_fwrite(VALUE str, rb_io_t *fptr)
long len, n, r, l, offset = 0;
if (NEED_WRITECONV(fptr)) {
VALUE common_encoding = Qnil;
make_writeconv(fptr);
if (fptr->writeconv) {
if (!NIL_P(fptr->writeconv_stateless)) {
str = rb_str_transcode(str, fptr->writeconv_stateless);
}
str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT);
if (!NIL_P(fptr->writeconv_stateless))
common_encoding = fptr->writeconv_stateless;
}
else {
if (fptr->enc2)
str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc2));
common_encoding = rb_enc_from_encoding(fptr->enc2);
else
str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc));
common_encoding = rb_enc_from_encoding(fptr->enc);
}
if (!NIL_P(common_encoding)) {
int ecflags = 0;
if (fptr->mode & FMODE_INVALID_MASK)
ecflags |= (fptr->mode / (FMODE_INVALID_MASK/ECONV_INVALID_MASK)) & ECONV_INVALID_MASK;
if (fptr->mode & FMODE_UNDEF_MASK)
ecflags |= (fptr->mode / (FMODE_UNDEF_MASK/ECONV_UNDEF_MASK)) & ECONV_UNDEF_MASK;
str = rb_str_transcode(str, common_encoding, ecflags);
}
if (fptr->writeconv) {
str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT);
}
}
@ -4622,7 +4641,7 @@ rb_scan_open_args(int argc, VALUE *argv,
static VALUE fs_enc;
if (!fs_enc)
fs_enc = rb_enc_from_encoding(fs_encoding);
fname = rb_str_transcode(fname, fs_enc);
fname = rb_str_transcode(fname, fs_enc, 0);
}
}
#endif

Просмотреть файл

@ -1202,5 +1202,68 @@ EOT
}
}
end
def test_invalid_w
with_tmpdir {
invalid_utf8 = "a\x80b".force_encoding("utf-8")
open("t.txt", "w:euc-jp", :invalid => :replace) {|f|
assert_nothing_raised { f.write invalid_utf8 }
}
assert_equal("a?b", File.read("t.txt"))
open("t.txt", "w:euc-jp", :invalid => :ignore) {|f|
assert_nothing_raised { f.write invalid_utf8 }
}
assert_equal("ab", File.read("t.txt"))
open("t.txt", "w:euc-jp", :undef => :replace) {|f|
assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 }
}
open("t.txt", "w:euc-jp", :undef => :ignore) {|f|
assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 }
}
}
end
def test_undef_w_stateless
with_tmpdir {
generate_file("t.txt", "a\uFFFDb")
open("t.txt", "w:euc-jp:utf-8", :undef => :replace) {|f|
assert_nothing_raised { f.write "a\uFFFDb" }
}
assert_equal("a?b", File.read("t.txt"))
open("t.txt", "w:euc-jp:utf-8", :undef => :ignore) {|f|
assert_nothing_raised { f.write "a\uFFFDb" }
}
assert_equal("ab", File.read("t.txt"))
open("t.txt", "w:euc-jp:utf-8", :invalid => :replace) {|f|
assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
}
open("t.txt", "w:euc-jp:utf-8", :invalid => :ignore) {|f|
assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
}
}
end
def test_undef_w_stateful
with_tmpdir {
generate_file("t.txt", "a\uFFFDb")
open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace) {|f|
assert_nothing_raised { f.write "a\uFFFDb" }
}
assert_equal("a?b", File.read("t.txt"))
open("t.txt", "w:iso-2022-jp:utf-8", :undef => :ignore) {|f|
assert_nothing_raised { f.write "a\uFFFDb" }
}
assert_equal("ab", File.read("t.txt"))
open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace) {|f|
assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
}
open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :ignore) {|f|
assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
}
}
end
end

Просмотреть файл

@ -1673,58 +1673,49 @@ str_transcoding_resize(VALUE destination, int len, int new_len)
}
static int
str_transcode(int argc, VALUE *argv, VALUE *self)
econv_opts(VALUE opt)
{
VALUE v;
int options = 0;
v = rb_hash_aref(opt, sym_invalid);
if (NIL_P(v)) {
}
else if (v==sym_ignore) {
options |= ECONV_INVALID_IGNORE;
}
else if (v==sym_replace) {
options |= ECONV_INVALID_REPLACE;
v = rb_hash_aref(opt, sym_replace);
}
else {
rb_raise(rb_eArgError, "unknown value for invalid character option");
}
v = rb_hash_aref(opt, sym_undef);
if (NIL_P(v)) {
}
else if (v==sym_ignore) {
options |= ECONV_UNDEF_IGNORE;
}
else if (v==sym_replace) {
options |= ECONV_UNDEF_REPLACE;
}
else {
rb_raise(rb_eArgError, "unknown value for undefined character option");
}
return options;
}
static int
str_transcode_enc_args(VALUE str, VALUE arg1, VALUE arg2,
const char **sname, rb_encoding **senc,
const char **dname, rb_encoding **denc)
{
VALUE dest;
VALUE str = *self;
long blen, slen;
unsigned char *buf, *bp, *sp;
const unsigned char *fromp;
rb_encoding *from_enc, *to_enc;
const char *from_e, *to_e;
int from_encidx, to_encidx;
VALUE from_encval, to_encval;
VALUE opt;
int options = 0;
if (0 < argc)
opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
else
opt = Qnil;
if (!NIL_P(opt)) {
VALUE v;
argc--;
v = rb_hash_aref(opt, sym_invalid);
if (NIL_P(v)) {
}
else if (v==sym_ignore) {
options |= ECONV_INVALID_IGNORE;
}
else if (v==sym_replace) {
options |= ECONV_INVALID_REPLACE;
v = rb_hash_aref(opt, sym_replace);
}
else {
rb_raise(rb_eArgError, "unknown value for invalid character option");
}
v = rb_hash_aref(opt, sym_undef);
if (NIL_P(v)) {
}
else if (v==sym_ignore) {
options |= ECONV_UNDEF_IGNORE;
}
else if (v==sym_replace) {
options |= ECONV_UNDEF_REPLACE;
}
else {
rb_raise(rb_eArgError, "unknown value for undefined character option");
}
}
if (argc < 1 || argc > 2) {
rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
}
if ((to_encidx = rb_to_encoding_index(to_encval = argv[0])) < 0) {
if ((to_encidx = rb_to_encoding_index(to_encval = arg1)) < 0) {
to_enc = 0;
to_encidx = 0;
to_e = StringValueCStr(to_encval);
@ -1733,12 +1724,12 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
to_enc = rb_enc_from_index(to_encidx);
to_e = rb_enc_name(to_enc);
}
if (argc==1) {
if (NIL_P(arg2)) {
from_encidx = rb_enc_get_index(str);
from_enc = rb_enc_from_index(from_encidx);
from_e = rb_enc_name(from_enc);
}
else if ((from_encidx = rb_to_encoding_index(from_encval = argv[1])) < 0) {
else if ((from_encidx = rb_to_encoding_index(from_encval = arg2)) < 0) {
from_enc = 0;
from_e = StringValueCStr(from_encval);
}
@ -1747,6 +1738,31 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
from_e = rb_enc_name(from_enc);
}
*sname = from_e;
*senc = from_enc;
*dname = to_e;
*denc = to_enc;
return to_encidx;
}
static int
str_transcode0(int argc, VALUE *argv, VALUE *self, int options)
{
VALUE dest;
VALUE str = *self;
long blen, slen;
unsigned char *buf, *bp, *sp;
const unsigned char *fromp;
rb_encoding *from_enc, *to_enc;
const char *from_e, *to_e;
int to_encidx;
if (argc < 1 || argc > 2) {
rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
}
to_encidx = str_transcode_enc_args(str, argv[0], argc==1 ? Qnil : argv[1], &from_e, &from_enc, &to_e, &to_enc);
if (from_enc && from_enc == to_enc) {
return -1;
}
@ -1782,6 +1798,22 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
return to_encidx;
}
static int
str_transcode(int argc, VALUE *argv, VALUE *self)
{
VALUE opt;
int options = 0;
if (0 < argc) {
opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
if (!NIL_P(opt)) {
argc--;
options = econv_opts(opt);
}
}
return str_transcode0(argc, argv, self, options);
}
static inline VALUE
str_encode_associate(VALUE str, int encidx)
{
@ -1850,9 +1882,16 @@ str_encode(int argc, VALUE *argv, VALUE str)
}
VALUE
rb_str_transcode(VALUE str, VALUE to)
rb_str_transcode(VALUE str, VALUE to, int flags)
{
return str_encode(1, &to, str);
int argc = 1;
VALUE *argv = &to;
VALUE newstr = str;
int encidx = str_transcode0(argc, argv, &newstr, flags);
if (encidx < 0) return rb_str_dup(str);
RBASIC(newstr)->klass = rb_obj_class(str);
return str_encode_associate(newstr, encidx);
}
static void
@ -2305,7 +2344,7 @@ econv_primitive_insert_output(VALUE self, VALUE string)
StringValue(string);
insert_enc = rb_econv_encoding_to_insert_output(ec);
string = rb_str_transcode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)));
string = rb_str_transcode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)), 0);
ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc);
if (ret == -1)