From 6e0fed271c1d2e6f2b13b99d89d43e7d00e81472 Mon Sep 17 00:00:00 2001 From: matz Date: Fri, 2 Feb 2007 13:19:44 +0000 Subject: [PATCH] * ruby.h (SYMBOL_P): make Symbol immediate again for performance. * string.c: redesign symbol methods. * parse.y (rb_id2str): store Strings for operator symbols. [ruby-dev:30235] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@11615 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 11 +++ compile.c | 3 +- parse.y | 88 +++++++++------------- ruby.h | 9 +-- string.c | 216 ++++++++++++++++++++++++++---------------------------- 5 files changed, 153 insertions(+), 174 deletions(-) diff --git a/ChangeLog b/ChangeLog index c484b395f1..a98c8f6ac8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -12,6 +12,17 @@ Fri Feb 2 18:27:54 2007 Yukihiro Matsumoto * eval.c: remove duplicated global variables rb_cProc and rb_cBinding. [ruby-dev:30242] +Fri Feb 2 00:13:44 2007 Yukihiro Matsumoto + + * ruby.h (SYMBOL_P): make Symbol immediate again for performance. + + * string.c: redesign symbol methods. + +Thu Feb 1 23:25:21 2007 Nobuyoshi Nakada + + * parse.y (rb_id2str): store Strings for operator symbols. + [ruby-dev:30235] + Thu Feb 1 21:04:39 2007 Yukihiro Matsumoto * parse.y (assignable_gen): no need to generate NODE_CVDECL. diff --git a/compile.c b/compile.c index c8ddb2f80f..dc2765e92d 100644 --- a/compile.c +++ b/compile.c @@ -2065,8 +2065,7 @@ case_when_optimizable_literal(NODE * node) { if (nd_type(node) == NODE_LIT) { VALUE v = node->nd_lit; - VALUE klass = CLASS_OF(v); - if (klass == rb_cSymbol || rb_obj_is_kind_of(v, rb_cNumeric)) { + if (SYMBOL_P(v) || rb_obj_is_kind_of(v, rb_cNumeric)) { return v; } } diff --git a/parse.y b/parse.y index 1d93b5cd08..a08d429739 100644 --- a/parse.y +++ b/parse.y @@ -8318,7 +8318,8 @@ static const struct { static struct symbols { ID last_id; st_table *sym_id; - st_table *id_sym; + st_table *id_str; + VALUE op_sym[tLAST_TOKEN]; } global_symbols = {tLAST_TOKEN}; static struct st_hash_type symhash = { @@ -8330,13 +8331,15 @@ void Init_sym(void) { global_symbols.sym_id = st_init_table_with_size(&symhash, 1000); - global_symbols.id_sym = st_init_numtable_with_size(1000); + global_symbols.id_str = st_init_numtable_with_size(1000); } void rb_gc_mark_symbols(void) { - rb_mark_tbl(global_symbols.id_sym); + rb_mark_tbl(global_symbols.id_str); + rb_gc_mark_locations(global_symbols.op_sym, + global_symbols.op_sym + tLAST_TOKEN); } static ID @@ -8440,26 +8443,15 @@ rb_symname_p(const char *name) return *m ? Qfalse : Qtrue; } -int -rb_sym_interned_p(str) - VALUE str; -{ - ID id; - - if (st_lookup(global_symbols.sym_id, (st_data_t)str, (st_data_t *)&id)) - return Qtrue; - return Qfalse; -} - ID rb_intern2(const char *name, long len) { const char *m = name; - VALUE sym = rb_str_new(name, len); + VALUE str = rb_str_new(name, len); ID id; int last; - if (st_lookup(global_symbols.sym_id, (st_data_t)sym, (st_data_t *)&id)) + if (st_lookup(global_symbols.sym_id, (st_data_t)str, (st_data_t *)&id)) return id; last = len-1; @@ -8523,10 +8515,9 @@ rb_intern2(const char *name, long len) new_id: id |= ++global_symbols.last_id << ID_SCOPE_SHIFT; id_register: - RBASIC(sym)->klass = rb_cSymbol; - OBJ_FREEZE(sym); - st_add_direct(global_symbols.sym_id, (st_data_t)sym, id); - st_add_direct(global_symbols.id_sym, id, (st_data_t)sym); + OBJ_FREEZE(str); + st_add_direct(global_symbols.sym_id, (st_data_t)str, id); + st_add_direct(global_symbols.id_str, id, (st_data_t)str); return id; } @@ -8537,31 +8528,7 @@ rb_intern(const char *name) } VALUE -rb_id2sym(ID id) -{ - VALUE data; - - while (!st_lookup(global_symbols.id_sym, id, &data)) { - rb_id2name(id); - } - if (!RBASIC(data)->klass) { - RBASIC(data)->klass = rb_cSymbol; - } - return data; -} - -ID -rb_sym2id(VALUE sym) -{ - ID data; - - if (st_lookup(global_symbols.sym_id, sym, &data)) - return data; - return rb_intern2(RSTRING_PTR(sym), RSTRING_LEN(sym)); -} - -const char * -rb_id2name(ID id) +rb_id2str(ID id) { const char *name; st_data_t data; @@ -8570,13 +8537,20 @@ rb_id2name(ID id) int i = 0; for (i=0; op_tbl[i].token; i++) { - if (op_tbl[i].token == id) - return op_tbl[i].name; + if (op_tbl[i].token == id) { + VALUE str = global_symbols.op_sym[i]; + if (!str) { + str = rb_str_new2(op_tbl[i].name); + OBJ_FREEZE(str); + global_symbols.op_sym[i] = str; + } + return str; + } } } - if (st_lookup(global_symbols.id_sym, id, &data)) - return RSTRING_PTR(data); + if (st_lookup(global_symbols.id_str, id, &data)) + return (VALUE)data; if (is_attrset_id(id)) { ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL; @@ -8589,7 +8563,7 @@ rb_id2name(ID id) strcpy(buf, name); strcat(buf, "="); rb_intern(buf); - return rb_id2name(id); + return rb_id2str(id); } if (is_local_id(id2)) { id2 = (id & ~ID_SCOPE_MASK) | ID_CONST; @@ -8599,13 +8573,19 @@ rb_id2name(ID id) return 0; } +const char * +rb_id2name(ID id) +{ + VALUE str = rb_id2str(id); + + if (!str) return 0; + return RSTRING_PTR(str); +} + static int symbols_i(VALUE sym, ID value, VALUE ary) { - if (!RBASIC(sym)->klass) { - RBASIC(sym)->klass = rb_cSymbol; - } - rb_ary_push(ary, sym); + rb_ary_push(ary, ID2SYM(value)); return ST_CONTINUE; } diff --git a/ruby.h b/ruby.h index cf303f8e3c..e5b4a69697 100644 --- a/ruby.h +++ b/ruby.h @@ -197,11 +197,10 @@ VALUE rb_ull2inum(unsigned LONG_LONG); #define IMMEDIATE_MASK 0x03 #define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK) -#define SYMBOL_P(x) (!SPECIAL_CONST_P(x) && RBASIC(x)->klass == rb_cSymbol) -VALUE rb_id2sym(ID); -ID rb_sym2id(VALUE); -#define ID2SYM(x) rb_id2sym(x) -#define SYM2ID(x) rb_sym2id(x) +#define SYMBOL_FLAG 0x0e +#define SYMBOL_P(x) (((VALUE)(x)&0xff)==SYMBOL_FLAG) +#define ID2SYM(x) ((VALUE)(((long)(x))<<8|SYMBOL_FLAG)) +#define SYM2ID(x) RSHIFT((unsigned long)x,8) /* special contants - i.e. non-zero and non-fixnum constants */ #define Qfalse ((VALUE)0) diff --git a/string.c b/string.c index 7a27af8d77..faacf95428 100644 --- a/string.c +++ b/string.c @@ -4227,7 +4227,7 @@ rb_str_intern(VALUE s) volatile VALUE str = s; ID id; - if (OBJ_TAINTED(str) && rb_safe_level() >= 1 && !rb_sym_interned_p(str)) { + if (OBJ_TAINTED(str) && rb_safe_level() >= 1) { rb_raise(rb_eSecurityError, "Insecure: can't intern tainted string"); } id = rb_intern2(RSTRING_PTR(str), RSTRING_LEN(str)); @@ -4631,7 +4631,7 @@ rb_str_setter(VALUE val, ID id, VALUE *var) static VALUE rb_sym_s_intern(VALUE s) { - if (rb_class_real(s) == rb_cSymbol) { + if (SYMBOL_P(s)) { return s; } StringValue(s); @@ -4651,48 +4651,7 @@ static VALUE sym_equal(VALUE sym1, VALUE sym2) { if (sym1 == sym2) return Qtrue; - if (SYMBOL_P(sym2)) return Qfalse; - return rb_str_equal(sym1, sym2); -} - -/* - * call-seq: - * sym.eql?(other) => true or false - * - * Two symbols are equal if they are exactly same symbols. - */ - -static VALUE -sym_eql(VALUE sym1, VALUE sym2) -{ - if (sym1 == sym2) return Qtrue; - if (SYMBOL_P(sym2)) return Qfalse; - return rb_str_eql(sym1, sym2); -} - -/* - * call-seq: - * sym.hash => fixnum - * - * Return a hash based on the symbol's length and content. - */ -static VALUE -sym_hash(VALUE sym) -{ - int h; - VALUE hval; - - if (STR_SHARED_P(sym)) { - /* if a symbol has shared value, that's a hash value. */ - return RSTRING(sym)->as.heap.aux.shared; - } - h = rb_str_hash(sym); - hval = INT2FIX(h); - if (!STR_EMBED_P(sym)) { - FL_SET(sym, STR_ASSOC); - RSTRING(sym)->as.heap.aux.shared = hval; - } - return hval; + return Qfalse; } @@ -4728,8 +4687,10 @@ sym_to_i(VALUE sym) static VALUE sym_inspect(VALUE sym) { - VALUE str; + VALUE str, klass = Qundef; + ID id = SYM2ID(sym); + sym = rb_id2str(id); str = rb_str_new(0, RSTRING_LEN(sym)+1); RSTRING_PTR(str)[0] = ':'; memcpy(RSTRING_PTR(str)+1, RSTRING_PTR(sym), RSTRING_LEN(sym)); @@ -4738,6 +4699,10 @@ sym_inspect(VALUE sym) str = rb_str_dump(str); strncpy(RSTRING_PTR(str), ":\"", 2); } + if (klass != Qundef) { + rb_str_cat2(str, "/"); + rb_str_append(str, rb_inspect(klass)); + } return str; } @@ -4753,10 +4718,12 @@ sym_inspect(VALUE sym) */ -static VALUE -sym_to_s(VALUE sym) +VALUE +rb_sym_to_s(VALUE sym) { - return rb_str_new(RSTRING_PTR(sym), RSTRING_LEN(sym)); + ID id = SYM2ID(sym); + + return str_new3(rb_cString, rb_id2str(id)); } @@ -4809,14 +4776,73 @@ sym_to_proc(VALUE sym) static VALUE sym_succ(VALUE sym) { - return rb_str_intern(rb_str_succ(sym)); + return rb_str_intern(rb_str_succ(rb_sym_to_s(sym))); } -static ID -str_to_id(VALUE str) +static VALUE +sym_cmp(VALUE sym, VALUE other) { - VALUE sym = rb_str_intern(str); - return SYM2ID(sym); + if (!SYMBOL_P(other)) { + return Qnil; + } + return rb_str_cmp_m(rb_sym_to_s(sym), rb_sym_to_s(other)); +} + +static VALUE +sym_casecmp(VALUE sym, VALUE other) +{ + if (!SYMBOL_P(other)) { + return Qnil; + } + return rb_str_casecmp(rb_sym_to_s(sym), rb_sym_to_s(other)); +} + +static VALUE +sym_match(VALUE sym, VALUE other) +{ + return rb_str_match(rb_sym_to_s(sym), other); +} + +static VALUE +sym_aref(int argc, VALUE *argv, VALUE sym) +{ + return rb_str_aref_m(argc, argv, rb_sym_to_s(sym)); +} + +static VALUE +sym_length(VALUE sym) +{ + return rb_str_length(rb_id2str(SYM2ID(sym))); +} + +static VALUE +sym_empty(VALUE sym) +{ + return rb_str_empty(rb_id2str(SYM2ID(sym))); +} + +static VALUE +sym_upcase(VALUE sym) +{ + return rb_str_intern(rb_str_upcase(rb_id2str(SYM2ID(sym)))); +} + +static VALUE +sym_downcase(VALUE sym) +{ + return rb_str_intern(rb_str_downcase(rb_id2str(SYM2ID(sym)))); +} + +static VALUE +sym_capitalize(VALUE sym) +{ + return rb_str_intern(rb_str_capitalize(rb_id2str(SYM2ID(sym)))); +} + +static VALUE +sym_swapcase(VALUE sym) +{ + return rb_str_intern(rb_str_swapcase(rb_id2str(SYM2ID(sym)))); } ID @@ -4826,24 +4852,18 @@ rb_to_id(VALUE name) ID id; switch (TYPE(name)) { - case T_STRING: - return str_to_id(name); - case T_FIXNUM: - rb_warn("do not use Fixnums as Symbols"); - id = FIX2LONG(name); - if (!rb_id2name(id)) { - rb_raise(rb_eArgError, "%ld is not a symbol", id); - } - break; - case T_SYMBOL: - return SYM2ID(name); - break; default: tmp = rb_check_string_type(name); - if (!NIL_P(tmp)) { - return str_to_id(tmp); + if (NIL_P(tmp)) { + rb_raise(rb_eTypeError, "%s is not a symbol", + RSTRING_PTR(rb_inspect(name))); } - rb_raise(rb_eTypeError, "%s is not a symbol", RSTRING_PTR(rb_inspect(name))); + name = tmp; + case T_STRING: + name = rb_str_intern(name); + /* fall through */ + case T_SYMBOL: + return SYM2ID(name); } return id; } @@ -4989,60 +5009,30 @@ Init_String(void) rb_define_singleton_method(rb_cSymbol, "intern", rb_sym_s_intern, 1); rb_define_method(rb_cSymbol, "==", sym_equal, 1); - rb_define_method(rb_cSymbol, "eql?", sym_eql, 1); - rb_define_method(rb_cSymbol, "hash", sym_hash, 0); rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0); rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0); - rb_define_method(rb_cSymbol, "to_s", sym_to_s, 0); - rb_define_method(rb_cSymbol, "id2name", sym_to_s, 0); + rb_define_method(rb_cSymbol, "to_s", rb_sym_to_s, 0); + rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0); rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0); rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0); rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0); rb_define_method(rb_cSymbol, "succ", sym_succ, 0); rb_define_method(rb_cSymbol, "next", sym_succ, 0); - - rb_define_method(rb_cSymbol, "<=>", rb_str_cmp_m, 1); - rb_define_method(rb_cSymbol, "casecmp", rb_str_casecmp, 1); - rb_define_method(rb_cSymbol, "+", rb_str_plus, 1); - rb_define_method(rb_cSymbol, "*", rb_str_times, 1); - rb_define_method(rb_cSymbol, "%", rb_str_format_m, 1); - rb_define_method(rb_cSymbol, "[]", rb_str_aref_m, -1); - rb_define_method(rb_cSymbol, "length", rb_str_length, 0); - rb_define_method(rb_cSymbol, "size", rb_str_length, 0); - rb_define_method(rb_cSymbol, "empty?", rb_str_empty, 0); - rb_define_method(rb_cSymbol, "=~", rb_str_match, 1); - rb_define_method(rb_cSymbol, "match", rb_str_match_m, -1); - rb_define_method(rb_cSymbol, "index", rb_str_index_m, -1); - rb_define_method(rb_cSymbol, "rindex", rb_str_rindex_m, -1); - rb_define_method(rb_cSymbol, "chr", rb_str_chr, 0); - - rb_define_method(rb_cSymbol, "to_f", rb_str_to_f, 0); - rb_define_method(rb_cSymbol, "to_str", rb_str_to_s, 0); rb_define_method(rb_cSymbol, "dump", rb_str_dump, 0); - rb_define_method(rb_cSymbol, "upcase", rb_str_upcase, 0); - rb_define_method(rb_cSymbol, "downcase", rb_str_downcase, 0); - rb_define_method(rb_cSymbol, "capitalize", rb_str_capitalize, 0); - rb_define_method(rb_cSymbol, "swapcase", rb_str_swapcase, 0); + rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1); + rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1); + rb_define_method(rb_cSymbol, "=~", sym_match, 1); - rb_define_method(rb_cSymbol, "ord", rb_str_ord, 0); + rb_define_method(rb_cSymbol, "[]", sym_aref, -1); + rb_define_method(rb_cSymbol, "slice", sym_aref, -1); + rb_define_method(rb_cSymbol, "length", sym_length, 0); + rb_define_method(rb_cSymbol, "size", sym_length, 0); + rb_define_method(rb_cSymbol, "empty?", sym_empty, 0); + rb_define_method(rb_cSymbol, "match", sym_match, -1); - rb_define_method(rb_cSymbol, "include?", rb_str_include, 1); - rb_define_method(rb_cSymbol, "start_with?", rb_str_start_with, -1); - rb_define_method(rb_cSymbol, "end_with?", rb_str_end_with, -1); - - rb_define_method(rb_cSymbol, "scan", rb_str_scan, 1); - - rb_define_method(rb_cSymbol, "sub", rb_str_sub, -1); - rb_define_method(rb_cSymbol, "gsub", rb_str_gsub, -1); - - rb_define_method(rb_cSymbol, "tr", rb_str_tr, 2); - rb_define_method(rb_cSymbol, "tr_s", rb_str_tr_s, 2); - rb_define_method(rb_cSymbol, "delete", rb_str_delete, -1); - rb_define_method(rb_cSymbol, "squeeze", rb_str_squeeze, -1); - rb_define_method(rb_cSymbol, "count", rb_str_count, -1); - - rb_define_method(rb_cSymbol, "each_byte", rb_str_each_byte, 0); - - rb_define_method(rb_cSymbol, "slice", rb_str_aref_m, -1); + rb_define_method(rb_cSymbol, "upcase", sym_upcase, 0); + rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0); + rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0); + rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0); }