* object.c (Init_Object): move symbol related code to string.c

* string.c (Init_String): Symbol as subclass of String.

* parse.y (rb_intern2): handle symbol as strings.

* string.c (str_new): substring of symbols are mere strings, not
  symbols.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10834 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2006-09-02 15:05:27 +00:00
Родитель 2156870525
Коммит ccf5372b25
7 изменённых файлов: 301 добавлений и 219 удалений

Просмотреть файл

@ -1,3 +1,14 @@
Sat Sep 2 23:53:28 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
* object.c (Init_Object): move symbol related code to string.c
* string.c (Init_String): Symbol as subclass of String.
* parse.y (rb_intern2): handle symbol as strings.
* string.c (str_new): substring of symbols are mere strings, not
symbols.
Sat Sep 2 23:37:29 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
* ruby.h (struct RArray): embed small arrays.
@ -17,6 +28,13 @@ Sat Sep 2 12:06:35 2006 NAKAMURA, Hiroshi <nahi@ruby-lang.org>
XML attribute which value is nil. value "" and nil both were dumped
as 'attr="value"'. [ruby-dev:29395]
Sat Sep 2 11:47:58 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
* eval.c (rb_eval): should handle when in else clause. a patch
from Eric Hodel <drbrain at segment7.net>. [ruby-core:08662]
* parse.y (primary): wrap with NODE_CASE. [ruby-core:08663]
Sat Sep 2 12:00:32 2006 NAKAMURA, Hiroshi <nahi@ruby-lang.org>
* lib/csv.rb (CSV::IOReader#initialize): use String#[](pos, len)

Просмотреть файл

@ -489,7 +489,7 @@ ins_methods_push(ID name, long type, VALUE ary, long visi)
break;
}
if (visi) {
rb_ary_push(ary, rb_str_new2(rb_id2name(name)));
rb_ary_push(ary, ID2SYM(name));
}
return ST_CONTINUE;
}

1
gc.c
Просмотреть файл

@ -1381,6 +1381,7 @@ garbage_collect(void)
(VALUE*)((char*)rb_gc_stack_start + 2));
#endif
rb_gc_mark_threads();
rb_gc_mark_symbols();
/* mark protected global variables */
for (list = global_List; list; list = list->next) {

198
object.c
Просмотреть файл

@ -30,7 +30,6 @@ VALUE rb_cData;
VALUE rb_cNilClass;
VALUE rb_cTrueClass;
VALUE rb_cFalseClass;
VALUE rb_cSymbol;
static ID id_eq, id_eql, id_inspect, id_init_copy;
@ -938,149 +937,6 @@ rb_obj_pattern_match(VALUE obj1, VALUE obj2)
return Qnil;
}
/**********************************************************************
* Document-class: Symbol
*
* <code>Symbol</code> objects represent names and some strings
* inside the Ruby
* interpreter. They are generated using the <code>:name</code> and
* <code>:"string"</code> literals
* syntax, and by the various <code>to_sym</code> methods. The same
* <code>Symbol</code> object will be created for a given name or string
* for the duration of a program's execution, regardless of the context
* or meaning of that name. Thus if <code>Fred</code> is a constant in
* one context, a method in another, and a class in a third, the
* <code>Symbol</code> <code>:Fred</code> will be the same object in
* all three contexts.
*
* module One
* class Fred
* end
* $f1 = :Fred
* end
* module Two
* Fred = 1
* $f2 = :Fred
* end
* def Fred()
* end
* $f3 = :Fred
* $f1.id #=> 2514190
* $f2.id #=> 2514190
* $f3.id #=> 2514190
*
*/
/*
* call-seq:
* sym.to_i => fixnum
*
* Returns an integer that is unique for each symbol within a
* particular execution of a program.
*
* :fred.to_i #=> 9809
* "fred".to_sym.to_i #=> 9809
*/
static VALUE
sym_to_i(VALUE sym)
{
ID id = SYM2ID(sym);
return LONG2FIX(id);
}
/*
* call-seq:
* sym.inspect => string
*
* Returns the representation of <i>sym</i> as a symbol literal.
*
* :fred.inspect #=> ":fred"
*/
static VALUE
sym_inspect(VALUE sym)
{
VALUE str;
const char *name;
ID id = SYM2ID(sym);
name = rb_id2name(id);
str = rb_str_new(0, strlen(name)+1);
RSTRING_PTR(str)[0] = ':';
strcpy(RSTRING_PTR(str)+1, name);
if (!rb_symname_p(name)) {
str = rb_str_dump(str);
strncpy(RSTRING_PTR(str), ":\"", 2);
}
return str;
}
/*
* call-seq:
* sym.id2name => string
* sym.to_s => string
*
* Returns the name or string corresponding to <i>sym</i>.
*
* :fred.id2name #=> "fred"
*/
static VALUE
sym_to_s(VALUE sym)
{
return rb_str_new2(rb_id2name(SYM2ID(sym)));
}
/*
* call-seq:
* sym.to_sym => sym
*
* In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
* to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
* in this case.
*/
static VALUE
sym_to_sym(VALUE sym)
{
return sym;
}
static VALUE
sym_call(VALUE args, VALUE sym)
{
VALUE obj;
if (RARRAY_LEN(args) < 1) {
rb_raise(rb_eArgError, "no receiver given");
}
obj = RARRAY_PTR(args)[0];
return rb_funcall3(obj, (ID)sym,
RARRAY_LEN(args) - 1,
RARRAY_PTR(args) + 1);
}
/*
* call-seq:
* sym.to_proc
*
* Returns a _Proc_ object which respond to the given method by _sym_.
*
* (1..3).collect(&:to_s) #=> ["1", "2", "3"]
*/
static VALUE
sym_to_proc(VALUE sym)
{
return rb_proc_new(sym_call, (VALUE)SYM2ID(sym));
}
/***********************************************************************
*
@ -1473,47 +1329,6 @@ rb_class_superclass(VALUE klass)
return super;
}
static ID
str_to_id(VALUE str)
{
if (!RSTRING_PTR(str) || RSTRING_LEN(str) == 0) {
rb_raise(rb_eArgError, "empty symbol string");
}
if (RSTRING_LEN(str) != strlen(RSTRING_PTR(str))) {
rb_raise(rb_eArgError, "Symbols should not contain NUL (\\0)");
}
return rb_intern(RSTRING_PTR(str));
}
ID
rb_to_id(VALUE name)
{
VALUE tmp;
ID id;
switch (TYPE(name)) {
case T_STRING:
return str_to_id(name);
case T_FIXNUM:
rb_warn("do not use Fixnums as Symbols");
id = FIX2LONG(name);
if (!rb_id2name(id)) {
rb_raise(rb_eArgError, "%ld is not a symbol", id);
}
break;
case T_SYMBOL:
id = SYM2ID(name);
break;
default:
tmp = rb_check_string_type(name);
if (!NIL_P(tmp)) {
return str_to_id(tmp);
}
rb_raise(rb_eTypeError, "%s is not a symbol", RSTRING_PTR(rb_inspect(name)));
}
return id;
}
/*
* call-seq:
* attr_reader(symbol, ...) => nil
@ -2449,19 +2264,6 @@ Init_Object(void)
rb_undef_method(CLASS_OF(rb_cNilClass), "new");
rb_define_global_const("NIL", Qnil);
rb_cSymbol = rb_define_class("Symbol", rb_cObject);
rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
rb_undef_alloc_func(rb_cSymbol);
rb_undef_method(CLASS_OF(rb_cSymbol), "new");
rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0);
rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
rb_define_method(rb_cSymbol, "to_s", sym_to_s, 0);
rb_define_method(rb_cSymbol, "id2name", sym_to_s, 0);
rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
rb_define_method(rb_cSymbol, "===", rb_obj_equal, 1);
rb_define_method(rb_cModule, "freeze", rb_mod_freeze, 0);
rb_define_method(rb_cModule, "===", rb_mod_eqq, 1);
rb_define_method(rb_cModule, "==", rb_obj_equal, 1);

75
parse.y
Просмотреть файл

@ -8255,15 +8255,26 @@ static const struct {
static struct symbols {
ID last_id;
st_table *tbl;
st_table *rev;
st_table *sym_id;
st_table *id_sym;
} global_symbols = {tLAST_TOKEN};
static struct st_hash_type symhash = {
rb_str_cmp,
rb_str_hash,
};
void
Init_sym(void)
{
global_symbols.tbl = st_init_strtable_with_size(200);
global_symbols.rev = st_init_numtable_with_size(200);
global_symbols.sym_id = st_init_table_with_size(&symhash, 1000);
global_symbols.id_sym = st_init_numtable_with_size(1000);
}
void
rb_gc_mark_symbols(int lev)
{
rb_mark_tbl(global_symbols.id_sym);
}
static ID
@ -8368,16 +8379,17 @@ rb_symname_p(const char *name)
}
ID
rb_intern(const char *name)
rb_intern2(const char *name, long len)
{
const char *m = name;
VALUE sym = rb_str_new(name, len);
ID id;
int last;
if (st_lookup(global_symbols.tbl, (st_data_t)name, (st_data_t *)&id))
if (st_lookup(global_symbols.sym_id, (st_data_t)sym, (st_data_t *)&id))
return id;
last = strlen(name)-1;
last = len-1;
id = 0;
switch (*name) {
case '$':
@ -8438,12 +8450,42 @@ rb_intern(const char *name)
new_id:
id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
id_register:
name = strdup(name);
st_add_direct(global_symbols.tbl, (st_data_t)name, id);
st_add_direct(global_symbols.rev, id, (st_data_t)name);
RBASIC(sym)->klass = rb_cSymbol;
OBJ_FREEZE(sym);
st_add_direct(global_symbols.sym_id, (st_data_t)sym, id);
st_add_direct(global_symbols.id_sym, id, (st_data_t)sym);
return id;
}
ID
rb_intern(const char *name)
{
return rb_intern2(name, strlen(name));
}
VALUE
rb_id2sym(ID id)
{
VALUE data;
if (st_lookup(global_symbols.id_sym, id, &data)) {
if (!RBASIC(data)->klass) {
RBASIC(data)->klass = rb_cSymbol;
}
return data;
}
}
ID
rb_sym2id(VALUE sym)
{
ID data;
if (st_lookup(global_symbols.sym_id, sym, &data))
return data;
return rb_intern2(RSTRING_PTR(sym), RSTRING_LEN(sym));
}
const char *
rb_id2name(ID id)
{
@ -8459,8 +8501,8 @@ rb_id2name(ID id)
}
}
if (st_lookup(global_symbols.rev, id, &data))
return (char *)data;
if (st_lookup(global_symbols.id_sym, id, &data))
return RSTRING_PTR(data);
if (is_attrset_id(id)) {
ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL;
@ -8484,9 +8526,9 @@ rb_id2name(ID id)
}
static int
symbols_i(char *key, ID value, VALUE ary)
symbols_i(VALUE sym, ID value, VALUE ary)
{
rb_ary_push(ary, ID2SYM(value));
rb_ary_push(ary, sym);
return ST_CONTINUE;
}
@ -8509,9 +8551,9 @@ symbols_i(char *key, ID value, VALUE ary)
VALUE
rb_sym_all_symbols(void)
{
VALUE ary = rb_ary_new2(global_symbols.tbl->num_entries);
VALUE ary = rb_ary_new2(global_symbols.sym_id->num_entries);
st_foreach(global_symbols.tbl, symbols_i, ary);
st_foreach(global_symbols.sym_id, symbols_i, ary);
return ary;
}
@ -9261,3 +9303,4 @@ Init_ripper(void)
rb_intern("&&");
}
#endif /* RIPPER */

10
ruby.h
Просмотреть файл

@ -195,10 +195,11 @@ VALUE rb_ull2inum(unsigned LONG_LONG);
#define IMMEDIATE_MASK 0x03
#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK)
#define SYMBOL_FLAG 0x0e
#define SYMBOL_P(x) (((VALUE)(x)&0xff)==SYMBOL_FLAG)
#define ID2SYM(x) ((VALUE)(((long)(x))<<8|SYMBOL_FLAG))
#define SYM2ID(x) RSHIFT((VALUE)x,8)
#define SYMBOL_P(x) (!IMMEDIATE_P(x) && RBASIC(x)->klass == rb_cSymbol)
VALUE rb_id2sym(ID);
ID rb_sym2id(VALUE);
#define ID2SYM(x) rb_id2sym(x)
#define SYM2ID(x) rb_sym2id(x)
/* special contants - i.e. non-zero and non-fixnum constants */
#define Qfalse ((VALUE)0)
@ -596,6 +597,7 @@ void rb_gc_register_address(VALUE*);
void rb_gc_unregister_address(VALUE*);
ID rb_intern(const char*);
ID rb_intern2(const char*, long);
const char *rb_id2name(ID);
ID rb_to_id(VALUE);

216
string.c
Просмотреть файл

@ -26,6 +26,7 @@
#endif
VALUE rb_cString;
VALUE rb_cSymbol;
#define STR_TMPLOCK FL_USER7
#define STR_NOEMBED FL_USER1
@ -134,6 +135,7 @@ str_new(VALUE klass, const char *ptr, long len)
rb_raise(rb_eArgError, "negative string size (or size too big)");
}
if (klass == rb_cSymbol) klass = rb_cString;
str = str_alloc(klass);
if (len > RSTRING_EMBED_LEN_MAX) {
RSTRING(str)->as.heap.aux.capa = len;
@ -4369,6 +4371,207 @@ rb_str_setter(VALUE val, ID id, VALUE *var)
}
/**********************************************************************
* Document-class: Symbol
*
* <code>Symbol</code> objects represent names and some strings
* inside the Ruby
* interpreter. They are generated using the <code>:name</code> and
* <code>:"string"</code> literals
* syntax, and by the various <code>to_sym</code> methods. The same
* <code>Symbol</code> object will be created for a given name or string
* for the duration of a program's execution, regardless of the context
* or meaning of that name. Thus if <code>Fred</code> is a constant in
* one context, a method in another, and a class in a third, the
* <code>Symbol</code> <code>:Fred</code> will be the same object in
* all three contexts.
*
* module One
* class Fred
* end
* $f1 = :Fred
* end
* module Two
* Fred = 1
* $f2 = :Fred
* end
* def Fred()
* end
* $f3 = :Fred
* $f1.id #=> 2514190
* $f2.id #=> 2514190
* $f3.id #=> 2514190
*
*/
/*
* call-seq:
* Symbol.new(str) => new_sym
* Symbol.intern(str) => new_sym
*
* Returns a new symbol corresponding to <i>str</i>.
*/
static VALUE
rb_sym_s_intern(VALUE s)
{
if (rb_class_real(s) == rb_cSymbol) {
return s;
}
StringValue(s);
return rb_intern2(RSTRING_PTR(s), RSTRING_LEN(s));
}
/*
* call-seq:
* sym.to_i => fixnum
*
* Returns an integer that is unique for each symbol within a
* particular execution of a program.
*
* :fred.to_i #=> 9809
* "fred".to_sym.to_i #=> 9809
*/
static VALUE
sym_to_i(VALUE sym)
{
ID id = SYM2ID(sym);
return LONG2FIX(id);
}
/*
* call-seq:
* sym.inspect => string
*
* Returns the representation of <i>sym</i> as a symbol literal.
*
* :fred.inspect #=> ":fred"
*/
static VALUE
sym_inspect(VALUE sym)
{
VALUE str;
str = rb_str_new(0, RSTRING_LEN(sym)+1);
RSTRING_PTR(str)[0] = ':';
memcpy(RSTRING_PTR(str)+1, RSTRING_PTR(sym), RSTRING_LEN(sym));
if (!rb_symname_p(RSTRING_PTR(sym))) {
str = rb_str_dump(str);
strncpy(RSTRING_PTR(str), ":\"", 2);
}
return str;
}
/*
* call-seq:
* sym.id2name => string
* sym.to_s => string
*
* Returns the name or string corresponding to <i>sym</i>.
*
* :fred.id2name #=> "fred"
*/
static VALUE
sym_to_s(VALUE sym)
{
return rb_str_new(RSTRING_PTR(sym), RSTRING_LEN(sym));
}
/*
* call-seq:
* sym.to_sym => sym
* sym.intern => sym
*
* In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
* to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
* in this case.
*/
static VALUE
sym_to_sym(VALUE sym)
{
return sym;
}
static VALUE
sym_call(VALUE args, VALUE sym)
{
VALUE obj;
if (RARRAY_LEN(args) < 1) {
rb_raise(rb_eArgError, "no receiver given");
}
obj = RARRAY_PTR(args)[0];
return rb_funcall3(obj, (ID)sym,
RARRAY_LEN(args) - 1,
RARRAY_PTR(args) + 1);
}
/*
* call-seq:
* sym.to_proc
*
* Returns a _Proc_ object which respond to the given method by _sym_.
*
* (1..3).collect(&:to_s) #=> ["1", "2", "3"]
*/
static VALUE
sym_to_proc(VALUE sym)
{
return rb_proc_new(sym_call, (VALUE)SYM2ID(sym));
}
static ID
str_to_id(VALUE str)
{
if (!RSTRING_PTR(str) || RSTRING_LEN(str) == 0) {
rb_raise(rb_eArgError, "empty symbol string");
}
if (RBASIC(str)->klass == rb_cSymbol)
return str;
return rb_intern2(RSTRING_PTR(str), RSTRING_LEN(str));
}
ID
rb_to_id(VALUE name)
{
VALUE tmp;
ID id;
switch (TYPE(name)) {
case T_STRING:
return str_to_id(name);
case T_FIXNUM:
rb_warn("do not use Fixnums as Symbols");
id = FIX2LONG(name);
if (!rb_id2name(id)) {
rb_raise(rb_eArgError, "%ld is not a symbol", id);
}
break;
case T_SYMBOL:
id = SYM2ID(name);
break;
default:
tmp = rb_check_string_type(name);
if (!NIL_P(tmp)) {
return str_to_id(tmp);
}
rb_raise(rb_eTypeError, "%s is not a symbol", RSTRING_PTR(rb_inspect(name)));
}
return id;
}
/*
* A <code>String</code> object holds and manipulates an arbitrary sequence of
* bytes, typically representing characters. String objects may be created
@ -4496,4 +4699,17 @@ Init_String(void)
rb_fs = Qnil;
rb_define_variable("$;", &rb_fs);
rb_define_variable("$-F", &rb_fs);
rb_cSymbol = rb_define_class("Symbol", rb_cString);
rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
rb_define_singleton_method(rb_cSymbol, "intern", rb_sym_s_intern, 1);
rb_define_singleton_method(rb_cSymbol, "new", rb_sym_s_intern, 1);
rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0);
rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
rb_define_method(rb_cSymbol, "to_s", sym_to_s, 0);
rb_define_method(rb_cSymbol, "id2name", sym_to_s, 0);
rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
}