diff --git a/encoding.c b/encoding.c index d4fe6ea124..e6b49ef145 100644 --- a/encoding.c +++ b/encoding.c @@ -967,6 +967,21 @@ enc_set_index(VALUE obj, int idx) rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx)); } +void +rb_enc_raw_set(VALUE obj, rb_encoding *enc) +{ + RUBY_ASSERT(enc_capable(obj)); + + int idx = enc ? ENC_TO_ENCINDEX(enc) : 0; + + if (idx < ENCODING_INLINE_MAX) { + ENCODING_SET_INLINED(obj, idx); + return; + } + ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX); + rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx)); +} + void rb_enc_set_index(VALUE obj, int idx) { diff --git a/internal/encoding.h b/internal/encoding.h index fe9ea10ec4..29d7dc5c2d 100644 --- a/internal/encoding.h +++ b/internal/encoding.h @@ -28,6 +28,8 @@ int rb_encdb_dummy(const char *name); void rb_encdb_declare(const char *name); void rb_enc_set_base(const char *name, const char *orig); int rb_enc_set_dummy(int index); +void rb_enc_raw_set(VALUE obj, rb_encoding *enc); + PUREFUNC(int rb_data_is_encoding(VALUE obj)); /* vm.c */ diff --git a/string.c b/string.c index 9f54289dae..002d55d6a0 100644 --- a/string.c +++ b/string.c @@ -988,7 +988,7 @@ empty_str_alloc(VALUE klass) } static VALUE -str_new0(VALUE klass, const char *ptr, long len, int termlen) +str_enc_new(VALUE klass, const char *ptr, long len, rb_encoding *enc) { VALUE str; @@ -996,12 +996,18 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen) rb_raise(rb_eArgError, "negative string size (or size too big)"); } + if (enc == NULL) { + enc = rb_ascii8bit_encoding(); + } + RUBY_DTRACE_CREATE_HOOK(STRING, len); + int termlen = rb_enc_mbminlen(enc); + if (STR_EMBEDDABLE_P(len, termlen)) { str = str_alloc_embed(klass, len + termlen); if (len == 0) { - ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); + ENC_CODERANGE_SET(str, rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID); } } else { @@ -1013,9 +1019,13 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen) RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(sizeof(char), len, sizeof(char), termlen); } + + rb_enc_raw_set(str, enc); + if (ptr) { memcpy(RSTRING_PTR(str), ptr, len); } + STR_SET_LEN(str, len); TERM_FILL(RSTRING_PTR(str) + len, termlen); return str; @@ -1024,7 +1034,7 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen) static VALUE str_new(VALUE klass, const char *ptr, long len) { - return str_new0(klass, ptr, len, 1); + return str_enc_new(klass, ptr, len, rb_ascii8bit_encoding()); } VALUE @@ -1036,29 +1046,19 @@ rb_str_new(const char *ptr, long len) VALUE rb_usascii_str_new(const char *ptr, long len) { - VALUE str = rb_str_new(ptr, len); - ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); - return str; + return str_enc_new(rb_cString, ptr, len, rb_usascii_encoding()); } VALUE rb_utf8_str_new(const char *ptr, long len) { - VALUE str = str_new(rb_cString, ptr, len); - rb_enc_associate_index(str, rb_utf8_encindex()); - return str; + return str_enc_new(rb_cString, ptr, len, rb_utf8_encoding()); } VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc) { - VALUE str; - - if (!enc) return rb_str_new(ptr, len); - - str = str_new0(rb_cString, ptr, len, rb_enc_mbminlen(enc)); - rb_enc_associate(str, enc); - return str; + return str_enc_new(rb_cString, ptr, len, enc); } VALUE @@ -1076,17 +1076,13 @@ rb_str_new_cstr(const char *ptr) VALUE rb_usascii_str_new_cstr(const char *ptr) { - VALUE str = rb_str_new_cstr(ptr); - ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); - return str; + return rb_enc_str_new_cstr(ptr, rb_usascii_encoding()); } VALUE rb_utf8_str_new_cstr(const char *ptr) { - VALUE str = rb_str_new_cstr(ptr); - rb_enc_associate_index(str, rb_utf8_encindex()); - return str; + return rb_enc_str_new_cstr(ptr, rb_utf8_encoding()); } VALUE @@ -1109,8 +1105,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex) } if (!ptr) { - rb_encoding *enc = rb_enc_get_from_index(encindex); - str = str_new0(klass, ptr, len, rb_enc_mbminlen(enc)); + str = str_enc_new(klass, ptr, len, rb_enc_from_index(encindex)); } else { RUBY_DTRACE_CREATE_HOOK(STRING, len); @@ -1119,8 +1114,8 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex) RSTRING(str)->as.heap.ptr = (char *)ptr; RSTRING(str)->as.heap.aux.capa = len; RBASIC(str)->flags |= STR_NOFREE; + rb_enc_associate_index(str, encindex); } - rb_enc_associate_index(str, encindex); return str; } @@ -1570,10 +1565,11 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) VALUE str; long len = RSTRING_LEN(orig); + rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding(); int termlen = copy_encoding ? TERM_LEN(orig) : 1; if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(len, termlen)) { - str = str_new0(klass, RSTRING_PTR(orig), len, termlen); + str = str_enc_new(klass, RSTRING_PTR(orig), len, enc); RUBY_ASSERT(STR_EMBED_P(str)); } else { @@ -1621,7 +1617,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len) { - return str_new0(rb_obj_class(obj), ptr, len, TERM_LEN(obj)); + return str_enc_new(rb_obj_class(obj), ptr, len, STR_ENC_GET(obj)); } static VALUE @@ -2083,8 +2079,6 @@ rb_str_s_new(int argc, VALUE *argv, VALUE klass) encoding = kwargs[0]; capacity = kwargs[1]; - int termlen = 1; - if (n == 1) { orig = StringValue(orig); } @@ -2100,7 +2094,6 @@ rb_str_s_new(int argc, VALUE *argv, VALUE klass) if (!UNDEF_P(encoding)) { enc = rb_to_encoding(encoding); - termlen = rb_enc_mbminlen(enc); } // If capacity is nil, we're basically just duping `orig`. @@ -2131,13 +2124,9 @@ rb_str_s_new(int argc, VALUE *argv, VALUE klass) } } - VALUE str = str_new0(klass, NULL, capa, termlen); + VALUE str = str_enc_new(klass, NULL, capa, enc); STR_SET_LEN(str, 0); - TERM_FILL(RSTRING_PTR(str), termlen); - - if (enc) { - rb_enc_associate(str, enc); - } + TERM_FILL(RSTRING_PTR(str), enc ? rb_enc_mbmaxlen(enc) : 1); if (!NIL_P(orig)) { rb_str_buf_append(str, orig); @@ -2426,7 +2415,7 @@ rb_str_plus(VALUE str1, VALUE str2) if (len1 > LONG_MAX - len2) { rb_raise(rb_eArgError, "string size too big"); } - str3 = str_new0(rb_cString, 0, len1+len2, termlen); + str3 = str_enc_new(rb_cString, 0, len1+len2, enc); ptr3 = RSTRING_PTR(str3); memcpy(ptr3, ptr1, len1); memcpy(ptr3+len1, ptr2, len2); @@ -2521,7 +2510,7 @@ rb_str_times(VALUE str, VALUE times) len *= RSTRING_LEN(str); termlen = TERM_LEN(str); - str2 = str_new0(rb_cString, 0, len, termlen); + str2 = str_enc_new(rb_cString, 0, len, STR_ENC_GET(str)); ptr2 = RSTRING_PTR(str2); if (len) { n = RSTRING_LEN(str); @@ -10887,7 +10876,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag) rb_raise(rb_eArgError, "argument too big"); } len += size; - res = str_new0(rb_cString, 0, len, termlen); + res = str_enc_new(rb_cString, 0, len, enc); p = RSTRING_PTR(res); if (flen <= 1) { memset(p, *f, llen); @@ -10923,7 +10912,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag) } TERM_FILL(p, termlen); STR_SET_LEN(res, p-RSTRING_PTR(res)); - rb_enc_associate(res, enc); + if (argc == 2) cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad)); if (cr != ENC_CODERANGE_BROKEN)