* common.mk (encdb.h): give output file name to make_encdb.rb.

* encoding.c (enc_table): simplified.

* encoding.c (enc_register_at): lazy loading.  [ruby-dev:33013]

* regenc.h (ENC_DUMMY): added.

* enc/make_encdb.rb: now emits macros only.

* enc/iso_2022_jp.h: split from encoding.c.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15086 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2008-01-17 14:56:22 +00:00
Родитель a0029e3adc
Коммит 0052259d5e
6 изменённых файлов: 306 добавлений и 176 удалений

Просмотреть файл

@ -1,3 +1,17 @@
Thu Jan 17 23:56:20 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* common.mk (encdb.h): give output file name to make_encdb.rb.
* encoding.c (enc_table): simplified.
* encoding.c (enc_register_at): lazy loading. [ruby-dev:33013]
* regenc.h (ENC_DUMMY): added.
* enc/make_encdb.rb: now emits macros only.
* enc/iso_2022_jp.h: split from encoding.c.
Thu Jan 17 21:48:21 2008 Nobuyoshi Nakada <nobu@ruby-lang.org> Thu Jan 17 21:48:21 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* re.c (rb_char_to_option_kcode): fixed typo. * re.c (rb_char_to_option_kcode): fixed typo.

Просмотреть файл

@ -704,7 +704,7 @@ node_name.inc: {$(VPATH)}node.h
$(BASERUBY) -n $(srcdir)/tool/node_name.rb $? > $@ $(BASERUBY) -n $(srcdir)/tool/node_name.rb $? > $@
encdb.h: $(srcdir)/enc/make_encdb.rb encdb.h: $(srcdir)/enc/make_encdb.rb
$(BASERUBY) $(srcdir)/enc/make_encdb.rb $(srcdir)/enc $(BASERUBY) $(srcdir)/enc/make_encdb.rb $(srcdir)/enc $@
miniprelude.c: $(srcdir)/tool/compile_prelude.rb $(srcdir)/prelude.rb miniprelude.c: $(srcdir)/tool/compile_prelude.rb $(srcdir)/prelude.rb
$(BASERUBY) -I$(srcdir) $(srcdir)/tool/compile_prelude.rb $(srcdir)/prelude.rb $@ $(BASERUBY) -I$(srcdir) $(srcdir)/tool/compile_prelude.rb $(srcdir)/prelude.rb $@

6
enc/iso_2022_jp.h Normal file
Просмотреть файл

@ -0,0 +1,6 @@
#include "regenc.h"
/* dummy for unsupported, statefull encoding */
ENC_DUMMY("ISO-2022-JP");
ENC_ALIAS("ISO2022-JP", "ISO-2022-JP");
ENC_REPLICATE("ISO-2022-JP-2", "ISO-2022-JP");
ENC_ALIAS("ISO2022-JP2", "ISO-2022-JP-2");

Просмотреть файл

@ -15,56 +15,59 @@ def check_duplication(encs, name, fn, line)
end end
end end
count = 0
lines = []
encodings = [] encodings = []
replicas = {}
aliases = {}
encdir = ARGV[0] encdir = ARGV[0]
Dir.open(encdir) {|d| d.grep(/.+\.c\z/)}.sort.each do |fn| outhdr = ARGV[1] || 'encdb.h'
Dir.open(encdir) {|d| d.grep(/.+\.[ch]\z/)}.sort.each do |fn|
open(File.join(encdir,fn)) do |f| open(File.join(encdir,fn)) do |f|
orig = nil orig = nil
name = nil name = nil
encs = [] encs = []
f.each_line do |line| f.each_line do |line|
break if /^OnigEncodingDefine/o =~ line if (/^OnigEncodingDefine/ =~ line)..(/"(.*?)"/ =~ line)
end if $1
f.each_line do |line| check_duplication(encs, $1, fn, $.)
break if /"(.*?)"/ =~ line encs << $1.upcase
end encodings << $1
if $1 count += 1
check_duplication(encs, $1, fn, $.) end
encs << $1.upcase else
encodings << $1 case line
f.each_line do |line| when /^\s*rb_enc_register\(\s*"([^"]+)"/
if /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line count += 1
raise ArgumentError, line = nil
'%s:%d: ENC_REPLICATE: %s is not defined yet. (replica %s)' % when /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/
[fn, $., $2, $1] unless encs.include?($2.upcase) raise ArgumentError,
check_duplication(encs, $1, fn, $.) '%s:%d: ENC_REPLICATE: %s is not defined yet. (replica %s)' %
encs << $1.upcase [fn, $., $2, $1] unless encs.include?($2.upcase)
encodings << $1 count += 1
replicas[$1] = $2 when /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/
elsif /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line raise ArgumentError,
raise ArgumentError, '%s:%d: ENC_ALIAS: %s is not defined yet. (alias %s)' %
'%s:%d: ENC_ALIAS: %s is not defined yet. (alias %s)' % [fn, $., $2, $1] unless encs.include?($2.upcase)
[fn, $., $2, $1] unless encs.include?($2.upcase) when /^ENC_DUMMY\(\s*"([^"]+)"/
check_duplication(encs, $1, fn, $.) count += 1
encodings << $1 else
aliases[$1] = $2 next
end end
check_duplication(encs, $1, fn, $.)
encs << $1.upcase
lines << line.sub(/;.*/m, ";\n") if line
end end
end end
end end
end end
open('encdb.h', 'wb') do |f| result = encodings.map {|e| %[ENC_DEFINE("#{e}");\n]}.join + lines.join +
f.puts 'static const char *const enc_name_list[] = {' "\n#define ENCODING_COUNT #{count}\n"
encodings.each {|name| f.puts' "%s",' % name} mode = IO::RDWR|IO::CREAT
f.puts('};', '', 'static void', 'enc_init_db(void)', '{') mode |= IO::BINARY if defined?(IO::BINARY)
replicas.each_pair {|name, orig| open(outhdr, mode) do |f|
f.puts ' ENC_REPLICATE("%s", "%s");' % [name, orig] unless f.read == result
} f.rewind
aliases.each_pair {|name, orig| f.truncate(0)
f.puts ' ENC_ALIAS("%s", "%s");' % [name, orig] f.print result
} end
f.puts '}'
end end

Просмотреть файл

@ -29,35 +29,62 @@ static struct {
struct rb_encoding_entry *list; struct rb_encoding_entry *list;
int count; int count;
int size; int size;
st_table *alias; st_table *names;
st_table *replica_name;
st_table *alias_name;
} enc_table; } enc_table;
void rb_enc_init(void);
#undef ENC_REPLICATE #undef ENC_REPLICATE
#undef ENC_ALIAS #undef ENC_ALIAS
#define ENC_REPLICATE(name, orig) st_insert(enc_table.replica_name, (st_data_t)(name), (st_data_t)(orig)) #undef ENC_DUMMY
#define ENC_ALIAS(name, orig) st_insert(enc_table.alias_name, (st_data_t)(name), (st_data_t)(orig)) static int encdb_replicate(const char *alias, const char *orig);
#define enc_name_list_size (sizeof(enc_name_list)/sizeof(enc_name_list[0])) static int encdb_alias(const char *alias, const char *orig);
static int encdb_dummy(const char *name);
static void enc_declare(const char *name);
#define ENC_REPLICATE(name, orig) encdb_replicate(name, orig)
#define ENC_ALIAS(name, orig) encdb_alias(name, orig)
#define ENC_DUMMY(name) encdb_dummy(name)
#define ENC_DEFINE(name) enc_declare(name)
static void
enc_init_db(void)
{
#include "encdb.h" #include "encdb.h"
}
#define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
#define ENC_UNINITIALIZED (&rb_cEncoding) #define ENC_UNINITIALIZED (&rb_cEncoding)
#define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding) #define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding)
#define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data) #define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data)
#define ENC_DUMMY FL_USER2 #define ENC_DUMMY_FLAG FL_USER2
#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY) #define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY_FLAG)
#define ENC_SET_DUMMY(enc) (RBASIC(enc)->flags |= ENC_DUMMY_FLAG)
static int load_encoding(const char *name);
static VALUE enc_base_encoding(VALUE self);
static void static void
enc_mark(void *ptr) enc_mark(void *ptr)
{ {
} }
static void
enc_free(void *ptr)
{
rb_encoding *enc = ptr;
struct rb_encoding_entry *ent = &enc_table.list[enc->ruby_encoding_index];
xfree((char *)ent->name);
ent->name = 0;
ent->enc = 0;
xfree(ptr);
}
static VALUE static VALUE
enc_new(rb_encoding *encoding) enc_new(rb_encoding *encoding)
{ {
VALUE enc = Data_Wrap_Struct(rb_cEncoding, enc_mark, -1, encoding); VALUE enc = Data_Wrap_Struct(rb_cEncoding, enc_mark, enc_free, encoding);
encoding->auxiliary_data = (void *)enc; encoding->auxiliary_data = (void *)enc;
return enc; return enc;
} }
@ -75,13 +102,19 @@ static int
enc_check_encoding(VALUE obj) enc_check_encoding(VALUE obj)
{ {
int index; int index;
rb_encoding *enc;
if (SPECIAL_CONST_P(obj) || BUILTIN_TYPE(obj) != T_DATA || if (SPECIAL_CONST_P(obj) || BUILTIN_TYPE(obj) != T_DATA ||
RDATA(obj)->dmark != enc_mark) { RDATA(obj)->dmark != enc_mark) {
return -1; return -1;
} }
index = rb_enc_to_index((rb_encoding*)RDATA(obj)->data); enc = (rb_encoding*)RDATA(obj)->data;
if (rb_enc_from_index(index) != RDATA(obj)->data) index = rb_enc_to_index(enc);
if (rb_enc_from_index(index) != enc)
return -1; return -1;
if (enc_autoload_p(enc)) {
index = load_encoding(enc->name);
}
return index; return index;
} }
@ -119,7 +152,7 @@ void
rb_gc_mark_encodings(void) rb_gc_mark_encodings(void)
{ {
int i; int i;
for (i = 0; i < enc_table.size; ++i) { for (i = 0; i < enc_table.count; ++i) {
rb_encoding *enc = enc_table.list[i].enc; rb_encoding *enc = enc_table.list[i].enc;
if (enc && enc_initialized_p(enc)) { if (enc && enc_initialized_p(enc)) {
rb_gc_mark(ENC_FROM_ENCODING(enc)); rb_gc_mark(ENC_FROM_ENCODING(enc));
@ -131,29 +164,50 @@ static int
enc_table_expand(int newsize) enc_table_expand(int newsize)
{ {
struct rb_encoding_entry *ent; struct rb_encoding_entry *ent;
int count = newsize;
if (enc_table.size >= newsize) return newsize; if (enc_table.size >= newsize) return newsize;
newsize = (newsize + 7) / 8 * 8;
ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize); ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
if (!ent) return -1; if (!ent) return -1;
memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size)); memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
enc_table.list = ent; enc_table.list = ent;
enc_table.size = newsize; enc_table.size = newsize;
return newsize; return count;
} }
static int static int
enc_register_at(int index, const char *name, rb_encoding *encoding) enc_register_at(int index, const char *name, rb_encoding *encoding)
{ {
struct rb_encoding_entry *ent = &enc_table.list[index]; struct rb_encoding_entry *ent = &enc_table.list[index];
void *obj = ENC_UNINITIALIZED;
name = strdup(name); if (!ent->name) {
ent->name = name; ent->name = name = strdup(name);
if (!ent->enc) ent->enc = malloc(sizeof(rb_encoding)); }
*ent->enc = *encoding; else if (STRCASECMP(name, ent->name)) {
return -1;
}
if (!ent->enc) {
ent->enc = malloc(sizeof(rb_encoding));
}
else {
obj = ent->enc->auxiliary_data;
}
if (encoding) {
*ent->enc = *encoding;
}
else {
memset(ent->enc, 0, sizeof(*ent->enc));
}
encoding = ent->enc; encoding = ent->enc;
encoding->name = name; encoding->name = name;
encoding->ruby_encoding_index = index; encoding->ruby_encoding_index = index;
if (rb_cEncoding) { st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
if (obj != ENC_UNINITIALIZED) {
encoding->auxiliary_data = obj;
}
else if (rb_cEncoding) {
/* initialize encoding data */ /* initialize encoding data */
enc_new(encoding); enc_new(encoding);
} }
@ -168,7 +222,6 @@ enc_register(const char *name, rb_encoding *encoding)
{ {
int index = enc_table.count; int index = enc_table.count;
if (index >= ENCODING_INLINE_MAX) index = enc_table.size;
if ((index = enc_table_expand(index + 1)) < 0) return -1; if ((index = enc_table_expand(index + 1)) < 0) return -1;
enc_table.count = index; enc_table.count = index;
return enc_register_at(index - 1, name, encoding); return enc_register_at(index - 1, name, encoding);
@ -185,12 +238,11 @@ rb_enc_register(const char *name, rb_encoding *encoding)
if (index >= 0) { if (index >= 0) {
rb_encoding *oldenc = rb_enc_from_index(index); rb_encoding *oldenc = rb_enc_from_index(index);
if (STRCASECMP(name, rb_enc_name(oldenc))) { if (STRCASECMP(name, rb_enc_name(oldenc))) {
st_data_t key = (st_data_t)name, alias;
st_delete(enc_table.alias, &key, &alias);
index = enc_register(name, encoding); index = enc_register(name, encoding);
} }
else if (enc_initialized_p(oldenc) && else if (!enc_autoload_p(oldenc) ||
!ENC_DUMMY_P(ENC_FROM_ENCODING(oldenc))) { (enc_initialized_p(oldenc) &&
!ENC_DUMMY_P(ENC_FROM_ENCODING(oldenc)))) {
enc_register_at(index, name, encoding); enc_register_at(index, name, encoding);
} }
else { else {
@ -199,11 +251,21 @@ rb_enc_register(const char *name, rb_encoding *encoding)
} }
else { else {
index = enc_register(name, encoding); index = enc_register(name, encoding);
set_encoding_const(name, rb_enc_from_index(index));
} }
set_encoding_const(name, rb_enc_from_index(index));
return index; return index;
} }
static void
enc_declare(const char *name)
{
int idx = rb_enc_registered(name);
if (idx < 0) {
idx = enc_register(name, 0);
}
set_encoding_const(name, rb_enc_from_index(idx));
}
static void static void
enc_check_duplication(const char *name) enc_check_duplication(const char *name)
{ {
@ -218,36 +280,68 @@ set_base_encoding(int index, rb_encoding *base)
VALUE enc = rb_enc_from_encoding(enc_table.list[index].enc); VALUE enc = rb_enc_from_encoding(enc_table.list[index].enc);
rb_ivar_set(enc, id_base_encoding, rb_enc_from_encoding(base)); rb_ivar_set(enc, id_base_encoding, rb_enc_from_encoding(base));
if (rb_enc_dummy_p(base)) FL_SET(enc, ENC_DUMMY); if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
return enc; return enc;
} }
int int
rb_enc_replicate(const char *name, rb_encoding *encoding) rb_enc_replicate(const char *name, rb_encoding *encoding)
{ {
int index = enc_table.size; int idx;
enc_check_duplication(name); enc_check_duplication(name);
if (enc_table_expand(index + 1) < 0) return -1; idx = enc_register(name, encoding);
enc_register_at(index, name, encoding); set_base_encoding(idx, encoding);
set_base_encoding(index, encoding); set_encoding_const(name, rb_enc_from_index(idx));
return index; return idx;
}
static int
enc_replicate(int idx, const char *name, rb_encoding *origenc)
{
if (idx < 0) {
idx = enc_register(name, origenc);
}
else {
idx = enc_register_at(idx, name, origenc);
}
if (idx >= 0) {
set_base_encoding(idx, origenc);
set_encoding_const(name, rb_enc_from_index(idx));
}
return idx;
}
static int
encdb_replicate(const char *name, const char *orig)
{
int origidx = rb_enc_registered(orig);
int idx = rb_enc_registered(name);
if (origidx < 0) {
origidx = enc_register(orig, 0);
}
return enc_replicate(idx, name, rb_enc_from_index(origidx));
} }
int int
rb_define_dummy_encoding(const char *name) rb_define_dummy_encoding(const char *name)
{ {
int index = enc_table.size; int index = rb_enc_replicate(name, rb_ascii8bit_encoding());
rb_encoding *encoding; VALUE enc = rb_enc_from_encoding(enc_table.list[index].enc);
VALUE enc;
enc_check_duplication(name); ENC_SET_DUMMY(enc);
if (index < ENCODING_INLINE_MAX) index = ENCODING_INLINE_MAX; return index;
if (enc_table_expand(index + 1) < 0) return -1; }
encoding = rb_ascii8bit_encoding();
enc_register_at(index, name, encoding); static int
enc = set_base_encoding(index, encoding); encdb_dummy(const char *name)
FL_SET(enc, ENC_DUMMY); {
int index = enc_replicate(rb_enc_registered(name), name,
rb_ascii8bit_encoding());
VALUE enc = rb_enc_from_encoding(enc_table.list[index].enc);
ENC_SET_DUMMY(enc);
return index; return index;
} }
@ -278,28 +372,38 @@ enc_dummy_p(VALUE enc)
} }
static int static int
enc_alias(const char *alias, const char *orig) enc_alias(const char *alias, int idx)
{ {
st_data_t data; alias = strdup(alias);
int idx; st_insert(enc_table.names, (st_data_t)alias, (st_data_t)idx);
set_encoding_const(alias, rb_enc_from_index(idx));
if (!enc_table.alias) {
enc_table.alias = st_init_strcasetable();
}
if ((idx = rb_enc_find_index(orig)) < 0) {
if (!st_lookup(enc_table.alias, (st_data_t)orig, &data))
return -1;
idx = (int)data;
}
st_insert(enc_table.alias, (st_data_t)alias, (st_data_t)idx);
return idx; return idx;
} }
int int
rb_enc_alias(const char *alias, const char *orig) rb_enc_alias(const char *alias, const char *orig)
{ {
int idx;
enc_check_duplication(alias); enc_check_duplication(alias);
return enc_alias(alias, orig); if (!enc_table.list) {
rb_enc_init();
}
if ((idx = rb_enc_find_index(orig)) < 0) {
return -1;
}
return enc_alias(alias, idx);
}
static int
encdb_alias(const char *alias, const char *orig)
{
int idx = rb_enc_registered(orig);
if (idx < 0) {
idx = enc_register(orig, 0);
}
return enc_alias(alias, idx);
} }
enum { enum {
@ -315,12 +419,16 @@ extern rb_encoding OnigEncodingUS_ASCII;
void void
rb_enc_init(void) rb_enc_init(void)
{ {
enc_table.count = enc_table_expand(ENCINDEX_BUILTIN_MAX); enc_table_expand(ENCODING_COUNT + 1);
if (!enc_table.names) {
enc_table.names = st_init_strcasetable();
}
#define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc) #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
ENC_REGISTER(ASCII); ENC_REGISTER(ASCII);
ENC_REGISTER(UTF_8); ENC_REGISTER(UTF_8);
ENC_REGISTER(US_ASCII); ENC_REGISTER(US_ASCII);
#undef ENC_REGISTER #undef ENC_REGISTER
enc_table.count = ENCINDEX_BUILTIN_MAX;
} }
rb_encoding * rb_encoding *
@ -329,7 +437,7 @@ rb_enc_from_index(int index)
if (!enc_table.list) { if (!enc_table.list) {
rb_enc_init(); rb_enc_init();
} }
if (index < 0 || enc_table.size <= index) { if (index < 0 || enc_table.count <= index) {
return 0; return 0;
} }
return enc_table.list[index].enc; return enc_table.list[index].enc;
@ -338,26 +446,12 @@ rb_enc_from_index(int index)
int int
rb_enc_registered(const char *name) rb_enc_registered(const char *name)
{ {
int i; st_data_t idx = 0;
st_data_t alias = 0;
if (!name) return -1; if (!name) return -1;
if (!enc_table.list) { if (!enc_table.list) return -1;
rb_enc_init(); if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
} return (int)idx;
for (i=0; i<enc_table.size; i++) {
if (!enc_table.list[i].name) {
if (i < ENCODING_INLINE_MAX - 1) i = ENCODING_INLINE_MAX - 1;
continue;
}
if (STRCASECMP(name, enc_table.list[i].name) == 0) {
return i;
}
}
if (!alias && enc_table.alias) {
if (st_lookup(enc_table.alias, (st_data_t)name, &alias)) {
return (int)alias;
}
} }
return -1; return -1;
} }
@ -368,39 +462,53 @@ require_enc(VALUE enclib)
return rb_require_safe(enclib, rb_safe_level()); return rb_require_safe(enclib, rb_safe_level());
} }
static int
load_encoding(const char *name)
{
VALUE enclib = rb_sprintf("enc/%s", name);
VALUE verbose = ruby_verbose;
VALUE debug = ruby_debug;
char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib);
while (s < e) {
if (!ISALNUM(*s)) *s = '_';
else if (ISUPPER(*s)) *s = TOLOWER(*s);
++s;
}
OBJ_FREEZE(enclib);
ruby_verbose = Qfalse;
ruby_debug = Qfalse;
rb_protect(require_enc, enclib, 0);
ruby_verbose = verbose;
ruby_debug = debug;
rb_set_errinfo(Qnil);
return rb_enc_registered(name);
}
int int
rb_enc_find_index(const char *name) rb_enc_find_index(const char *name)
{ {
int i = rb_enc_registered(name); int i = rb_enc_registered(name), b;
rb_encoding *enc;
VALUE base;
if (i < 0) { if (i < 0) {
VALUE enclib = rb_sprintf("enc/%s", name); i = load_encoding(name);
char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib);
while (s < e) {
if (!ISALNUM(*s)) *s = '_';
else if (ISUPPER(*s)) *s = TOLOWER(*s);
++s;
}
OBJ_FREEZE(enclib);
if (RTEST(rb_protect(require_enc, enclib, 0)))
i = rb_enc_registered(name);
rb_set_errinfo(Qnil);
} }
if (i < 0) { else if (enc_autoload_p(enc = rb_enc_from_index(i))) {
st_data_t key = (st_data_t)name, orig; if (enc_initialized_p(enc) &&
if (st_lookup(enc_table.replica_name, key, &orig)) { (base = enc_base_encoding(ENC_FROM_ENCODING(enc)), !NIL_P(base))) {
i = rb_enc_find_index((char *)orig); if ((b = enc_check_encoding(base)) < 0) {
if (i < 0) { st_data_t key, val;
rb_raise(rb_eRuntimeError, "unknown original encoding name - '%s' for replica '%s'", (char *)orig, name); key = (st_data_t)name;
if (st_delete(enc_table.names, &key, &val)) {
if (enc->name != (char *)key) xfree((char *)key);
}
return -1;
} }
i = rb_enc_replicate(name, rb_enc_from_index(i)); enc_register_at(i, name, rb_enc_from_index(b));
st_delete(enc_table.replica_name, &key, &orig);
} }
else if (st_lookup(enc_table.alias_name, key, &orig)) { else {
i = rb_enc_alias(name, (char *)orig); i = load_encoding(name);
if (i < 0) {
rb_raise(rb_eRuntimeError, "unknown original encoding name - '%s' for alias '%s'", (char *)orig, name);
}
st_delete(enc_table.alias_name, &key, &orig);
} }
} }
return i; return i;
@ -777,9 +885,9 @@ enc_base_encoding(VALUE self)
static VALUE static VALUE
enc_list(VALUE klass) enc_list(VALUE klass)
{ {
VALUE ary = rb_ary_new2(enc_table.size); VALUE ary = rb_ary_new2(enc_table.count);
int i; int i;
for (i = 0; i < enc_table.size; ++i) { for (i = 0; i < enc_table.count; ++i) {
rb_encoding *enc = enc_table.list[i].enc; rb_encoding *enc = enc_table.list[i].enc;
if (enc) { if (enc) {
rb_ary_push(ary, rb_enc_from_encoding(enc)); rb_ary_push(ary, rb_enc_from_encoding(enc));
@ -963,9 +1071,11 @@ set_encoding_const(const char *name, rb_encoding *enc)
{ {
VALUE encoding = rb_enc_from_encoding(enc); VALUE encoding = rb_enc_from_encoding(enc);
char *s = (char *)name; char *s = (char *)name;
int haslower = 0, valid = 0; int haslower = 0, hasupper = 0, valid = 0;
if (ISDIGIT(*s)) return;
if (ISUPPER(*s)) { if (ISUPPER(*s)) {
hasupper = 1;
while (*++s && (ISALNUM(*s) || *s == '_')) { while (*++s && (ISALNUM(*s) || *s == '_')) {
if (ISLOWER(*s)) haslower = 1; if (ISLOWER(*s)) haslower = 1;
} }
@ -976,9 +1086,11 @@ set_encoding_const(const char *name, rb_encoding *enc)
} }
if (!valid || haslower) { if (!valid || haslower) {
int len = strlen(name) + 1; int len = strlen(name) + 1;
if (!haslower) { if (!haslower || !hasupper) {
while (!ISLOWER(*s) && *++s); do {
if (*s) haslower = 1; if (ISLOWER(*s)) haslower = 1;
if (ISUPPER(*s)) hasupper = 1;
} while (*++s && (!haslower || !hasupper));
} }
MEMCPY(s = ALLOCA_N(char, len), name, char, len); MEMCPY(s = ALLOCA_N(char, len), name, char, len);
name = s; name = s;
@ -987,7 +1099,9 @@ set_encoding_const(const char *name, rb_encoding *enc)
for (; *s; ++s) { for (; *s; ++s) {
if (!ISALNUM(*s)) *s = '_'; if (!ISALNUM(*s)) *s = '_';
} }
rb_define_const(rb_cEncoding, name, encoding); if (hasupper) {
rb_define_const(rb_cEncoding, name, encoding);
}
} }
if (haslower) { if (haslower) {
for (s = (char *)name; *s; ++s) { for (s = (char *)name; *s; ++s) {
@ -999,10 +1113,10 @@ set_encoding_const(const char *name, rb_encoding *enc)
} }
static int static int
set_encoding_alias(st_data_t name, st_data_t orig, st_data_t arg) rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
{ {
rb_encoding *enc = rb_enc_from_index((int)orig); VALUE ary = (VALUE)arg;
set_encoding_const((const char *)name, enc); rb_ary_push(ary, rb_str_new2((char *)name));
return ST_CONTINUE; return ST_CONTINUE;
} }
@ -1025,25 +1139,25 @@ set_encoding_alias(st_data_t name, st_data_t orig, st_data_t arg)
static VALUE static VALUE
rb_enc_name_list(VALUE klass) rb_enc_name_list(VALUE klass)
{ {
VALUE ary = rb_ary_new2(enc_name_list_size); VALUE ary = rb_ary_new2(enc_table.names->num_entries);
int i; st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
for (i = 0; i < enc_name_list_size; i++) {
rb_ary_push(ary, rb_str_new2(enc_name_list[i]));
}
return ary; return ary;
} }
static int static int
rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg) rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
{ {
rb_hash_aset((VALUE)arg, rb_str_new2((char *)name), rb_str_new2(rb_enc_name(rb_enc_from_index((int)orig)))); VALUE *p = (VALUE *)arg;
return 0; VALUE aliases = p[0];
} VALUE ary = p[1];
int idx = (int)orig;
VALUE str = rb_ary_entry(ary, idx);
static int if (NIL_P(str)) {
rb_enc_aliases_str_i(st_data_t name, st_data_t orig, st_data_t arg) str = rb_str_new2(rb_enc_name(rb_enc_from_index(idx)));
{ rb_ary_store(ary, idx, str);
rb_hash_aset((VALUE)arg, rb_str_new2((char *)name), rb_str_new2((char *)orig)); }
rb_hash_aset(aliases, rb_str_new2((char *)name), str);
return 0; return 0;
} }
@ -1062,10 +1176,11 @@ rb_enc_aliases_str_i(st_data_t name, st_data_t orig, st_data_t arg)
static VALUE static VALUE
rb_enc_aliases(VALUE klass) rb_enc_aliases(VALUE klass)
{ {
VALUE aliases = rb_hash_new(); VALUE aliases[2];
if (enc_table.alias) st_foreach(enc_table.alias, rb_enc_aliases_enc_i, (st_data_t)aliases); aliases[0] = rb_hash_new();
st_foreach(enc_table.alias_name, rb_enc_aliases_str_i, (st_data_t)aliases); aliases[1] = rb_ary_new();
return aliases; st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
return aliases[0];
} }
void void
@ -1073,9 +1188,6 @@ Init_Encoding(void)
{ {
id_base_encoding = rb_intern("#base_encoding"); id_base_encoding = rb_intern("#base_encoding");
enc_table.replica_name = st_init_strcasetable();
enc_table.alias_name = st_init_strcasetable();
rb_cEncoding = rb_define_class("Encoding", rb_cObject); rb_cEncoding = rb_define_class("Encoding", rb_cObject);
rb_undef_alloc_func(rb_cEncoding); rb_undef_alloc_func(rb_cEncoding);
rb_define_method(rb_cEncoding, "to_s", enc_name, 0); rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
@ -1096,12 +1208,6 @@ Init_Encoding(void)
rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0); rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
enc_init_db(); enc_init_db();
/* dummy for unsupported, statefull encoding */
rb_define_dummy_encoding("ISO-2022-JP");
rb_enc_alias("ISO2022-JP", "ISO-2022-JP");
rb_define_dummy_encoding("ISO-2022-JP-2");
rb_enc_alias("ISO2022-JP2", "ISO-2022-JP-2");
} }
/* locale insensitive functions */ /* locale insensitive functions */

Просмотреть файл

@ -202,5 +202,6 @@ extern int ONIG_ENC_REGISTER(const char *, OnigEncodingType*);
/* macros for define replica encoding and encoding alias */ /* macros for define replica encoding and encoding alias */
#define ENC_REPLICATE(name, orig) #define ENC_REPLICATE(name, orig)
#define ENC_ALIAS(name, orig) #define ENC_ALIAS(name, orig)
#define ENC_DUMMY(name)
#endif /* REGENC_H */ #endif /* REGENC_H */