зеркало из https://github.com/github/ruby.git
* ext/nkf/nkf-utf8/nkf.c: Updated.
* ext/nkf/nkf.c (rb_nkf_enc_get): added. (find encoding or replicate default encoding) * ext/nkf/nkf.c (NKF::<ENCODING>): redefine encoding constant. * ext/nkf/lib/kconv.rb (Kconv::<ENCODING>): redefined as Encoding. * ext/nkf/lib/kconv.rb: refactoring. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14330 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
8acb0fbd4d
Коммит
dec6ba8a22
13
ChangeLog
13
ChangeLog
|
@ -1,3 +1,16 @@
|
|||
Wed Dec 19 19:18:06 2007 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* ext/nkf/nkf-utf8/nkf.c: Updated.
|
||||
|
||||
* ext/nkf/nkf.c (rb_nkf_enc_get): added.
|
||||
(find encoding or replicate default encoding)
|
||||
|
||||
* ext/nkf/nkf.c (NKF::<ENCODING>): redefine encoding constant.
|
||||
|
||||
* ext/nkf/lib/kconv.rb (Kconv::<ENCODING>): redefined as Encoding.
|
||||
|
||||
* ext/nkf/lib/kconv.rb: refactoring.
|
||||
|
||||
Wed Dec 19 19:11:08 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* bignum.c (rb_big_mul0): blocking check for bigger numbers.
|
||||
|
|
|
@ -44,13 +44,11 @@ module Kconv
|
|||
# UNKNOWN
|
||||
UNKNOWN = NKF::UNKNOWN
|
||||
|
||||
#
|
||||
#
|
||||
# Private Constants
|
||||
#
|
||||
|
||||
# Revision of kconv.rb
|
||||
REVISION = %q$Revision$
|
||||
|
||||
#Regexp of Encoding
|
||||
|
||||
# Regexp of Shift_JIS string (private constant)
|
||||
|
@ -83,7 +81,7 @@ module Kconv
|
|||
#
|
||||
|
||||
# call-seq:
|
||||
# Kconv.kconv(str, out_code, in_code = Kconv::AUTO)
|
||||
# Kconv.kconv(str, to_enc, from_enc=nil)
|
||||
#
|
||||
# Convert <code>str</code> to out_code.
|
||||
# <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
|
||||
|
@ -92,41 +90,10 @@ module Kconv
|
|||
# This method decode MIME encoded string and
|
||||
# convert halfwidth katakana to fullwidth katakana.
|
||||
# If you don't want to decode them, use NKF.nkf.
|
||||
def kconv(str, out_code, in_code = AUTO)
|
||||
opt = '-'
|
||||
case in_code
|
||||
when ::NKF::JIS
|
||||
opt << 'J'
|
||||
when ::NKF::EUC
|
||||
opt << 'E'
|
||||
when ::NKF::SJIS
|
||||
opt << 'S'
|
||||
when ::NKF::UTF8
|
||||
opt << 'W'
|
||||
when ::NKF::UTF16
|
||||
opt << 'W16'
|
||||
when ::NKF::UTF32
|
||||
opt << 'W32'
|
||||
end
|
||||
|
||||
case out_code
|
||||
when ::NKF::JIS
|
||||
opt << 'j'
|
||||
when ::NKF::EUC
|
||||
opt << 'e'
|
||||
when ::NKF::SJIS
|
||||
opt << 's'
|
||||
when ::NKF::UTF8
|
||||
opt << 'w'
|
||||
when ::NKF::UTF16
|
||||
opt << 'w16'
|
||||
when ::NKF::UTF32
|
||||
opt << 'w32'
|
||||
when ::NKF::NOCONV
|
||||
return str
|
||||
end
|
||||
|
||||
opt = '' if opt == '-'
|
||||
def kconv(str, to_enc, from_enc=nil)
|
||||
opt = ''
|
||||
opt += ' --ic=' + from_enc.name if from_enc
|
||||
opt += ' --oc=' + to_enc.name if to_enc
|
||||
|
||||
::NKF::nkf(opt, str)
|
||||
end
|
||||
|
@ -146,7 +113,7 @@ module Kconv
|
|||
# convert halfwidth katakana to fullwidth katakana.
|
||||
# If you don't want it, use NKF.nkf('-jxm0', str).
|
||||
def tojis(str)
|
||||
::NKF::nkf('-jm', str).force_encoding("iso-2022-JP")
|
||||
::NKF::nkf('-jm', str)
|
||||
end
|
||||
module_function :tojis
|
||||
|
||||
|
@ -160,7 +127,7 @@ module Kconv
|
|||
# convert halfwidth katakana to fullwidth katakana.
|
||||
# If you don't want it, use NKF.nkf('-exm0', str).
|
||||
def toeuc(str)
|
||||
::NKF::nkf('-em', str).force_encoding("EUC-JP")
|
||||
::NKF::nkf('-em', str)
|
||||
end
|
||||
module_function :toeuc
|
||||
|
||||
|
@ -174,7 +141,7 @@ module Kconv
|
|||
# convert halfwidth katakana to fullwidth katakana.
|
||||
# If you don't want it, use NKF.nkf('-sxm0', str).
|
||||
def tosjis(str)
|
||||
::NKF::nkf('-sm', str).force_encoding("Shift_JIS")
|
||||
::NKF::nkf('-sm', str)
|
||||
end
|
||||
module_function :tosjis
|
||||
|
||||
|
@ -188,7 +155,7 @@ module Kconv
|
|||
# convert halfwidth katakana to fullwidth katakana.
|
||||
# If you don't want it, use NKF.nkf('-wxm0', str).
|
||||
def toutf8(str)
|
||||
::NKF::nkf('-wm', str).force_encoding("UTF-8")
|
||||
::NKF::nkf('-wm', str)
|
||||
end
|
||||
module_function :toutf8
|
||||
|
||||
|
@ -227,21 +194,12 @@ module Kconv
|
|||
# call-seq:
|
||||
# Kconv.guess(str) -> integer
|
||||
#
|
||||
# Guess input encoding by NKF.guess2
|
||||
# Guess input encoding by NKF.guess
|
||||
def guess(str)
|
||||
::NKF::guess(str)
|
||||
end
|
||||
module_function :guess
|
||||
|
||||
# call-seq:
|
||||
# Kconv.guess_old(str) -> integer
|
||||
#
|
||||
# Guess input encoding by NKF.guess1
|
||||
def guess_old(str)
|
||||
::NKF::guess1(str)
|
||||
end
|
||||
module_function :guess_old
|
||||
|
||||
#
|
||||
# isEncoding
|
||||
#
|
||||
|
@ -283,7 +241,7 @@ end
|
|||
|
||||
class String
|
||||
# call-seq:
|
||||
# String#kconv(out_code, in_code = Kconv::AUTO)
|
||||
# String#kconv(to_enc, from_enc)
|
||||
#
|
||||
# Convert <code>self</code> to out_code.
|
||||
# <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
|
||||
|
@ -292,8 +250,8 @@ class String
|
|||
# This method decode MIME encoded string and
|
||||
# convert halfwidth katakana to fullwidth katakana.
|
||||
# If you don't want to decode them, use NKF.nkf.
|
||||
def kconv(out_code, in_code=Kconv::AUTO)
|
||||
Kconv::kconv(self, out_code, in_code)
|
||||
def kconv(to_enc, from_enc=nil)
|
||||
Kconv::kconv(self, to_enc, from_enc)
|
||||
end
|
||||
|
||||
#
|
||||
|
|
|
@ -320,6 +320,7 @@ static const struct {
|
|||
{"ISO-2022-JP-1", ISO_2022_JP_1},
|
||||
{"ISO-2022-JP-3", ISO_2022_JP_3},
|
||||
{"SHIFT_JIS", SHIFT_JIS},
|
||||
{"SJIS", SHIFT_JIS},
|
||||
{"WINDOWS-31J", WINDOWS_31J},
|
||||
{"CSWINDOWS31J", WINDOWS_31J},
|
||||
{"CP932", WINDOWS_31J},
|
||||
|
|
|
@ -61,6 +61,13 @@ rb_nkf_putchar(unsigned int c)
|
|||
#include "nkf-utf8/utf8tbl.c"
|
||||
#include "nkf-utf8/nkf.c"
|
||||
|
||||
rb_encoding* rb_nkf_enc_get(const char *name)
|
||||
{
|
||||
int idx = rb_enc_find_index(name);
|
||||
if (idx < 0) idx = rb_enc_replicate(name, rb_default_encoding());
|
||||
return rb_enc_from_index(idx);
|
||||
}
|
||||
|
||||
int nkf_split_options(const char *arg)
|
||||
{
|
||||
int count = 0;
|
||||
|
@ -126,16 +133,13 @@ int nkf_split_options(const char *arg)
|
|||
static VALUE
|
||||
rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
|
||||
{
|
||||
char *opt_ptr, *opt_end;
|
||||
volatile VALUE v;
|
||||
char *encname;
|
||||
int idx;
|
||||
rb_encoding *to_enc;
|
||||
const char *to_e;
|
||||
int to_encidx;
|
||||
|
||||
reinit();
|
||||
StringValue(opt);
|
||||
opt_ptr = RSTRING_PTR(opt);
|
||||
opt_end = opt_ptr + RSTRING_LEN(opt);
|
||||
nkf_split_options(opt_ptr);
|
||||
nkf_split_options(RSTRING_PTR(opt));
|
||||
|
||||
incsize = INCSIZE;
|
||||
|
||||
|
@ -144,7 +148,6 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
|
|||
input = (unsigned char *)RSTRING_PTR(src);
|
||||
i_len = RSTRING_LEN(src);
|
||||
result = rb_str_new(0, i_len*3 + 10);
|
||||
v = result;
|
||||
|
||||
output_ctr = 0;
|
||||
output = (unsigned char *)RSTRING_PTR(result);
|
||||
|
@ -154,15 +157,9 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
|
|||
kanji_convert(NULL);
|
||||
rb_str_set_len(result, output_ctr);
|
||||
OBJ_INFECT(result, src);
|
||||
encname = nkf_enc_name(output_encoding);
|
||||
fprintf(stderr, "%s\n", encname);
|
||||
idx = rb_enc_find_index(encname);
|
||||
fprintf(stderr, "%d\n", idx);
|
||||
if (idx <= 0) {
|
||||
idx = rb_enc_replicate(encname, rb_enc_find(rb_enc_name(ONIG_ENCODING_ASCII)));
|
||||
fprintf(stderr, "%d\n", idx);
|
||||
}
|
||||
rb_enc_associate_index(result, idx);
|
||||
|
||||
rb_enc_associate(result, rb_nkf_enc_get(nkf_enc_name(output_encoding)));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -178,9 +175,6 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
|
|||
static VALUE
|
||||
rb_nkf_guess(VALUE obj, VALUE src)
|
||||
{
|
||||
char* codename;
|
||||
rb_encoding* enc;
|
||||
|
||||
reinit();
|
||||
|
||||
input_ctr = 0;
|
||||
|
@ -192,13 +186,7 @@ rb_nkf_guess(VALUE obj, VALUE src)
|
|||
kanji_convert( NULL );
|
||||
guess_f = FALSE;
|
||||
|
||||
codename = get_guessed_code();
|
||||
enc = rb_enc_find(codename);
|
||||
if (enc <= 0) {
|
||||
int idx = rb_enc_replicate(codename, rb_enc_find(rb_enc_name(ONIG_ENCODING_ASCII)));
|
||||
enc = rb_enc_from_index(idx);
|
||||
}
|
||||
return rb_enc_from_encoding(enc);
|
||||
return rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code()));
|
||||
}
|
||||
|
||||
|
||||
|
@ -483,6 +471,18 @@ Init_nkf()
|
|||
rb_define_module_function(mNKF, "guess", rb_nkf_guess, 1);
|
||||
rb_define_alias(rb_singleton_class(mNKF), "guess", "guess");
|
||||
|
||||
rb_define_const(mNKF, "AUTO", Qnil);
|
||||
rb_define_const(mNKF, "NOCONV", Qnil);
|
||||
rb_define_const(mNKF, "UNKNOWN", Qnil);
|
||||
rb_define_const(mNKF, "BINARY", rb_enc_from_encoding(rb_nkf_enc_get("BINARY")));
|
||||
rb_define_const(mNKF, "ASCII", rb_enc_from_encoding(rb_nkf_enc_get("US_ASCII")));
|
||||
rb_define_const(mNKF, "JIS", rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP")));
|
||||
rb_define_const(mNKF, "EUC", rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP")));
|
||||
rb_define_const(mNKF, "SJIS", rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS")));
|
||||
rb_define_const(mNKF, "UTF8", rb_enc_from_encoding(rb_nkf_enc_get("UTF-8")));
|
||||
rb_define_const(mNKF, "UTF16", rb_enc_from_encoding(rb_nkf_enc_get("UTF-16")));
|
||||
rb_define_const(mNKF, "UTF32", rb_enc_from_encoding(rb_nkf_enc_get("UTF-32")));
|
||||
|
||||
/* Full version string of nkf */
|
||||
rb_define_const(mNKF, "VERSION", rb_str_new2(RUBY_NKF_VERSION));
|
||||
/* Version of nkf */
|
||||
|
|
Загрузка…
Ссылка в новой задаче