зеркало из https://github.com/github/ruby.git
follow to nkf 2.0.4
:new constants NKF::VERSION NKF::ASCII NKF::UTF8 NKF::UTF16 NKF::UTF32 :new methods NFK.guess1 (guess) NKF.guess2 (from nkf2) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7132 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
640fad68cc
Коммит
35b917f3ee
|
@ -3,5 +3,7 @@ depend
|
|||
extconf.rb
|
||||
lib/kconv.rb
|
||||
nkf.c
|
||||
nkf1.7/nkf.c
|
||||
nkf-utf8/config.h
|
||||
nkf-utf8/nkf.c
|
||||
nkf-utf8/utf8tbl.c
|
||||
test.rb
|
||||
|
|
|
@ -1,73 +1,226 @@
|
|||
require 'nkf'
|
||||
|
||||
module Kconv
|
||||
AUTO = NKF::AUTO
|
||||
JIS = NKF::JIS
|
||||
EUC = NKF::EUC
|
||||
SJIS = NKF::SJIS
|
||||
BINARY = NKF::BINARY
|
||||
NOCONV = NKF::NOCONV
|
||||
UNKNOWN = NKF::UNKNOWN
|
||||
#Constant of Encoding
|
||||
AUTO = ::NKF::AUTO
|
||||
JIS = ::NKF::JIS
|
||||
EUC = ::NKF::EUC
|
||||
SJIS = ::NKF::SJIS
|
||||
BINARY = ::NKF::BINARY
|
||||
NOCONV = ::NKF::NOCONV
|
||||
ASCII = ::NKF::ASCII
|
||||
UTF8 = ::NKF::UTF8
|
||||
UTF16 = ::NKF::UTF16
|
||||
UTF32 = ::NKF::UTF32
|
||||
UNKNOWN = ::NKF::UNKNOWN
|
||||
|
||||
#Regexp of Encoding
|
||||
Iconv_Shift_JIS = /\A(?:
|
||||
[\x00-\x7f\xa1-\xdf] |
|
||||
\x81[\x40-\x7e\x80-\xac\xb8-\xbf\xc8-\xce\xda-\xe8\xf0-\xf7\xfc] |
|
||||
\x82[\x4f-\x58\x60-\x79\x81-\x9a\x9f-\xf1] |
|
||||
\x83[\x40-\x7e\x80-\x96\x9f-\xb6\xbf-\xd6\x40-\x60] |
|
||||
\x84[\x40-\x60\x70-\x7e\x80-\x91\x9f-\xbe\x9f-\xfc] |
|
||||
[\x89-\x8f\x90-\x97\x99-\x9f\xe0-\xea][\x40-\x7e] |
|
||||
[\x89-\x97\x99-\x9f\xe0-\xe9][\x80-\xfc] |
|
||||
\x98[\x40-\x72\x9f-\xfc] |
|
||||
\xea[\x80-\xa4]
|
||||
)*\z/nx
|
||||
Iconv_EUC_JP = /\A(?:
|
||||
[\x00-\x7f] |
|
||||
\x8e [\xa1-\xdf] |
|
||||
\x8f [\xa1-\xdf] [\xa1-\xdf] |
|
||||
[\xa1\xb0-\xbce\xd0-\xf3][\xa1-\xfe] |
|
||||
\xa2[\xa1-\xae\xba-\xc1\xca-\xd0\xdc-\xea\xf2-\xf9\xfe] |
|
||||
\xa3[\xb0-\xb9\xc1-\xda\xe1-\xfa] |
|
||||
\xa4[\xa1-\xf3] |
|
||||
\xa5[\xa1-\xf6] |
|
||||
\xa6[\xa1-\xb8\xc1-\xd8] |
|
||||
\xa7[\xa1-\xc1\xd1-\xf1] |
|
||||
\xa8[\xa1-\xc0] |
|
||||
\xcf[\xa1-\xd3] |
|
||||
\xf4[\xa1-\xa6]
|
||||
)*\z/nx
|
||||
Iconv_UTF8 = /\A(?:\xef\xbb\xbf)?(?:
|
||||
[\x00-\x7f] |
|
||||
\xc2[\x80-\x8d\x90-\x9f\xa1\xaa\xac\xae-\xb1\xb4\xb6\xb8\xba\xbf] |
|
||||
\xc3[\x80-\xbf] |
|
||||
\xc4[\x80-\x93\x96-\xa2\xa4-\xab\xae-\xbf] |
|
||||
\xc5[\x80-\x8d\x90-\xbe] |
|
||||
\xc7[\x8d-\x9c\xb5] |
|
||||
\xcb[\x87\x98-\x9b\x9d] |
|
||||
\xce[\x84-\x86\x88-\x8a\x8c\x8e-\xa1\xa3-\xbf] |
|
||||
\xcf[\x80-\x8e] |
|
||||
\xd0[\x81-\x8c\x8e-\xbf] |
|
||||
\xd1[\x80-\x8f\x91-\x9f] |
|
||||
\xe2\x84[\x83\x96\xa2\xab] |
|
||||
\xe2\x86[\x83\x91-\x93\x96\xa2\xab] |
|
||||
\xe2\x87[\x83\x91-\x94\x96\xa2\xab] |
|
||||
\xe2\x88[\x82-\x83\x87-\x88\x8b\x91-\x94\x96\x9a\x9d-\x9e\xa0\xa2\xa7-\xac\xb4-\xb5\xbd] |
|
||||
\xe2\x89[\x82-\x83\x87-\x88\x8b\x91-\x94\x96\x9a\x9d-\x9e\xa0-\xa2\xa6-\xac\xb4-\xb5\xbd] |
|
||||
\xe2[\x8a\x8c][\x82-\x83\x86-\x88\x8b\x91-\x94\x96\x9a\x9d-\x9e\xa0-\xa2\xa5-\xac\xb4-\xb5\xbd] |
|
||||
\xe2[\x94-\x99][\x81-\x83\x86-\x88\x8b-\x8c\x8f-\x94\x96-\x98\x9a-\x9e\xa0-\xac\xaf-\xb0\xb3-\xb5\xb7-\xb8\xbb-\xbd\xbf] |
|
||||
\xe3\x80[\x81-\x83\x85-\x98\x9a-\x9e\xa0-\xad\xaf-\xb0\xb2-\xb5\xb7-\xb8\xbb-\xbd\xbf] |
|
||||
\xe3[\x81-\x83\xb8-\xbf][\x81-\xbf] |
|
||||
[\xe5-\xe7][\x80-\xbf][\x81-\xbf] |
|
||||
\xe8[\x80-\xae\xb0-\xbf][\x81-\xbf] |
|
||||
\xe9[\x80-\x92\x95-\xb1\xb3-\xbe][\x81-\xbf] |
|
||||
\xef[\xbc-\xbe][\x81-\xbf] |
|
||||
)*\z/nx
|
||||
RegexpShiftjis = /\A(?:
|
||||
[\x00-\x7f\xa1-\xdf] |
|
||||
[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]
|
||||
)*\z/nx
|
||||
RegexpEucjp = /\A(?:
|
||||
[\x00-\x7f] |
|
||||
\x8e [\xa1-\xdf] |
|
||||
\x8f [\xa1-\xdf] [\xa1-\xdf] |
|
||||
[\xa1-\xdf] [\xa1-\xdf]
|
||||
)*\z/nx
|
||||
RegexpUtf8 = /\A(?:
|
||||
[\x00-\x7f] |
|
||||
[\xc2-\xdf] [\x80-\xbf] |
|
||||
\xe0 [\xa0-\xbf] [\x80-\xbf] |
|
||||
[\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
|
||||
\xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
|
||||
[\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
|
||||
\xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
|
||||
)*\z/nx
|
||||
|
||||
#
|
||||
# kconv
|
||||
#
|
||||
|
||||
def kconv(str, out_code, in_code = AUTO)
|
||||
opt = '-'
|
||||
case in_code
|
||||
when NKF::JIS
|
||||
when ::NKF::JIS
|
||||
opt << 'J'
|
||||
when NKF::EUC
|
||||
when ::NKF::EUC
|
||||
opt << 'E'
|
||||
when NKF::SJIS
|
||||
when ::NKF::SJIS
|
||||
opt << 'S'
|
||||
when ::NKF::UTF8
|
||||
when ::NKF::UTF16
|
||||
opt << 'W'
|
||||
end
|
||||
|
||||
case out_code
|
||||
when NKF::JIS
|
||||
when ::NKF::JIS
|
||||
opt << 'j'
|
||||
when NKF::EUC
|
||||
when ::NKF::EUC
|
||||
opt << 'e'
|
||||
when NKF::SJIS
|
||||
when ::NKF::SJIS
|
||||
opt << 's'
|
||||
when NKF::NOCONV
|
||||
when ::NKF::UTF8
|
||||
when ::NKF::UTF16
|
||||
opt << 'w'
|
||||
when ::NKF::NOCONV
|
||||
return str
|
||||
end
|
||||
|
||||
opt = '' if opt == '-'
|
||||
|
||||
NKF::nkf(opt, str)
|
||||
::NKF::nkf(opt, str)
|
||||
end
|
||||
module_function :kconv
|
||||
|
||||
#
|
||||
# Encode to
|
||||
#
|
||||
|
||||
def tojis(str)
|
||||
NKF::nkf('-j', str)
|
||||
::NKF::nkf('-j', str)
|
||||
end
|
||||
module_function :tojis
|
||||
|
||||
def toeuc(str)
|
||||
NKF::nkf('-e', str)
|
||||
::NKF::nkf('-e', str)
|
||||
end
|
||||
module_function :toeuc
|
||||
|
||||
def tosjis(str)
|
||||
NKF::nkf('-s', str)
|
||||
::NKF::nkf('-s', str)
|
||||
end
|
||||
module_function :tosjis
|
||||
|
||||
def toutf8(str)
|
||||
::NKF::nkf('-w', str)
|
||||
end
|
||||
module_function :toutf8
|
||||
|
||||
def toutf16(str)
|
||||
::NKF::nkf('-w16', str)
|
||||
end
|
||||
module_function :toutf16
|
||||
|
||||
#
|
||||
# guess
|
||||
#
|
||||
|
||||
def guess(str)
|
||||
NKF::guess(str)
|
||||
::NKF::guess(str)
|
||||
end
|
||||
module_function :guess
|
||||
|
||||
def guess_old(str)
|
||||
::NKF::guess_old(str)
|
||||
end
|
||||
module_function :guess_old
|
||||
|
||||
#
|
||||
# isEncoding
|
||||
#
|
||||
|
||||
def iseuc(str)
|
||||
RegexpEucjp.match( str )
|
||||
end
|
||||
module_function :iseuc
|
||||
|
||||
def issjis(str)
|
||||
RegexpShiftjis.match( str )
|
||||
end
|
||||
module_function :issjis
|
||||
|
||||
def isutf8(str)
|
||||
RegexpUtf8.match( str )
|
||||
end
|
||||
module_function :isutf8
|
||||
|
||||
end
|
||||
|
||||
class String
|
||||
def kconv(out_code, in_code=Kconv::AUTO)
|
||||
Kconv::kconv(self, out_code, in_code)
|
||||
end
|
||||
|
||||
# to Encoding
|
||||
def tojis
|
||||
NKF::nkf('-j', self)
|
||||
::NKF::nkf('-j', self)
|
||||
end
|
||||
def toeuc
|
||||
NKF::nkf('-e', self)
|
||||
::NKF::nkf('-e', self)
|
||||
end
|
||||
def tosjis
|
||||
NKF::nkf('-s', self)
|
||||
::NKF::nkf('-s', self)
|
||||
end
|
||||
def toutf8
|
||||
::NKF::nkf('-w', self)
|
||||
end
|
||||
def toutf16
|
||||
::NKF::nkf('-w16', self)
|
||||
end
|
||||
|
||||
# is Encoding
|
||||
def iseuc
|
||||
Kconv.iseuc( self )
|
||||
end
|
||||
|
||||
def issjis
|
||||
Kconv.issjis( self )
|
||||
end
|
||||
|
||||
def isutf8
|
||||
Kconv.isutf8( self )
|
||||
end
|
||||
end
|
||||
|
|
187
ext/nkf/nkf.c
187
ext/nkf/nkf.c
|
@ -1,51 +1,82 @@
|
|||
/*
|
||||
* NKF Module for Ruby base on nkf 2.x
|
||||
*
|
||||
* original nkf2.0 is maintained at http://sourceforge.jp/projects/nkf/
|
||||
*
|
||||
*/
|
||||
|
||||
static char *RVersion = "2.0.4.1r1";
|
||||
|
||||
#include "ruby.h"
|
||||
|
||||
/* Encoding Constants */
|
||||
#define _AUTO 0
|
||||
#define _JIS 1
|
||||
#define _EUC 2
|
||||
#define _SJIS 3
|
||||
#define _BINARY 4
|
||||
#define _NOCONV 4
|
||||
#define _ASCII 5
|
||||
/* 0b011x is reserved for UTF-8 Family */
|
||||
#define _UTF8 6
|
||||
/* 0b10xx is reserved for UTF-16 Family */
|
||||
#define _UTF16 8
|
||||
/* 0b11xx is reserved for UTF-32 Family */
|
||||
#define _UTF32 12
|
||||
#define _OTHER 16
|
||||
#define _UNKNOWN _AUTO
|
||||
|
||||
/* Replace nkf's getchar/putchar for variable modification */
|
||||
/* we never use getc, ungetc */
|
||||
|
||||
#undef getc
|
||||
#undef ungetc
|
||||
#define getc(f) (input_ctr<i_len?input[input_ctr++]:-1)
|
||||
#define ungetc(c,f) input_ctr--
|
||||
#define getc(f) (input_ctr>=i_len?-1:input[input_ctr++])
|
||||
#define ungetc(c,f) input_ctr--
|
||||
|
||||
#define INCSIZE 32
|
||||
#undef putchar
|
||||
#define putchar(c) rb_nkf_putchar(c)
|
||||
#undef TRUE
|
||||
#undef FALSE
|
||||
#define putchar(c) rb_nkf_putchar(c)
|
||||
|
||||
#define INCSIZE 32
|
||||
/* Input/Output pointers */
|
||||
|
||||
static unsigned char *output;
|
||||
static unsigned char *input;
|
||||
static int input_ctr;
|
||||
static int i_len;
|
||||
static int output_ctr;
|
||||
static int o_len;
|
||||
static int incsize;
|
||||
|
||||
static unsigned char *input, *output;
|
||||
static int input_ctr, i_len;
|
||||
static int output_ctr, o_len;
|
||||
|
||||
static VALUE dst;
|
||||
static VALUE result;
|
||||
|
||||
static int
|
||||
rb_nkf_putchar(c)
|
||||
unsigned int c;
|
||||
unsigned int c;
|
||||
{
|
||||
if (output_ctr >= o_len) {
|
||||
o_len += incsize;
|
||||
rb_str_resize(dst, o_len);
|
||||
output = RSTRING(dst)->ptr;
|
||||
rb_str_resize(result, o_len);
|
||||
incsize *= 2;
|
||||
output = RSTRING(result)->ptr;
|
||||
}
|
||||
output[output_ctr++] = c;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/* Include kanji filter main part */
|
||||
/* getchar and putchar will be replaced during inclusion */
|
||||
|
||||
#define PERL_XS 1
|
||||
#include "nkf1.7/nkf.c"
|
||||
#include "nkf-utf8/utf8tbl.c"
|
||||
#include "nkf-utf8/nkf.c"
|
||||
|
||||
static VALUE
|
||||
rb_nkf_kconv(obj, opt, src)
|
||||
VALUE obj, opt, src;
|
||||
VALUE obj, opt, src;
|
||||
{
|
||||
char *opt_ptr, *opt_end;
|
||||
volatile VALUE v;
|
||||
|
@ -58,44 +89,46 @@ rb_nkf_kconv(obj, opt, src)
|
|||
if (*opt_ptr != '-') {
|
||||
continue;
|
||||
}
|
||||
arguments(opt_ptr);
|
||||
options(opt_ptr);
|
||||
}
|
||||
|
||||
incsize = INCSIZE;
|
||||
|
||||
input_ctr = 0;
|
||||
input_ctr = 0;
|
||||
StringValue(src);
|
||||
input = RSTRING(src)->ptr;
|
||||
i_len = RSTRING(src)->len;
|
||||
dst = rb_str_new(0, i_len*3 + 10);
|
||||
v = dst;
|
||||
result = rb_str_new(0, i_len*3 + 10);
|
||||
v = result;
|
||||
|
||||
output_ctr = 0;
|
||||
output = RSTRING(dst)->ptr;
|
||||
o_len = RSTRING(dst)->len;
|
||||
output = RSTRING(result)->ptr;
|
||||
o_len = RSTRING(result)->len;
|
||||
*output = '\0';
|
||||
|
||||
if(iso8859_f && (oconv != j_oconv || !x0201_f )) {
|
||||
iso8859_f = FALSE;
|
||||
}
|
||||
if(x0201_f == WISH_TRUE)
|
||||
x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
|
||||
|
||||
kanji_convert(NULL);
|
||||
RSTRING(dst)->ptr[output_ctr] = '\0';
|
||||
RSTRING(dst)->len = output_ctr;
|
||||
OBJ_INFECT(dst, src);
|
||||
RSTRING(result)->ptr[output_ctr] = '\0';
|
||||
RSTRING(result)->len = output_ctr;
|
||||
OBJ_INFECT(result, src);
|
||||
|
||||
return dst;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* NKF.guess1
|
||||
*
|
||||
* Character code detection - Algorithm described in:
|
||||
* Ken Lunde. `Understanding Japanese Information Processing'
|
||||
* Sebastopol, CA: O'Reilly & Associates.
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
rb_nkf_guess(obj, src)
|
||||
VALUE obj, src;
|
||||
rb_nkf_guess1(obj, src)
|
||||
VALUE obj, src;
|
||||
{
|
||||
unsigned char *p;
|
||||
unsigned char *pend;
|
||||
|
@ -107,16 +140,16 @@ rb_nkf_guess(obj, src)
|
|||
if (p == pend) return INT2FIX(_UNKNOWN);
|
||||
|
||||
#define INCR do {\
|
||||
p++;\
|
||||
if (p==pend) return INT2FIX(_UNKNOWN);\
|
||||
sequence_counter++;\
|
||||
if (sequence_counter % 2 == 1 && *p != 0xa4)\
|
||||
p++;\
|
||||
if (p==pend) return INT2FIX(_UNKNOWN);\
|
||||
sequence_counter++;\
|
||||
if (sequence_counter % 2 == 1 && *p != 0xa4)\
|
||||
sequence_counter = 0;\
|
||||
if (6 <= sequence_counter) {\
|
||||
sequence_counter = 0;\
|
||||
return INT2FIX(_EUC);\
|
||||
}\
|
||||
} while (0)
|
||||
if (6 <= sequence_counter) {\
|
||||
sequence_counter = 0;\
|
||||
return INT2FIX(_EUC);\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
if (*p == 0xa4)
|
||||
sequence_counter = 1;
|
||||
|
@ -180,19 +213,77 @@ rb_nkf_guess(obj, src)
|
|||
return INT2FIX(_UNKNOWN);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* NKF.guess2
|
||||
*
|
||||
* Guess Encoding By NKF2.0 Routine
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
rb_nkf_guess2(obj, src)
|
||||
VALUE obj, src;
|
||||
{
|
||||
int code = _BINARY;
|
||||
|
||||
reinit();
|
||||
|
||||
input_ctr = 0;
|
||||
StringValue(src);
|
||||
input = RSTRING(src)->ptr;
|
||||
i_len = RSTRING(src)->len;
|
||||
|
||||
if(x0201_f == WISH_TRUE)
|
||||
x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
|
||||
|
||||
guess_f = TRUE;
|
||||
kanji_convert( NULL );
|
||||
guess_f = FALSE;
|
||||
|
||||
if (!is_inputcode_mixed) {
|
||||
if (strcmp(input_codename, "") == 0) {
|
||||
code = _ASCII;
|
||||
} else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
|
||||
code = _JIS;
|
||||
} else if (strcmp(input_codename, "EUC-JP") == 0) {
|
||||
code = _EUC;
|
||||
} else if (strcmp(input_codename, "Shift_JIS") == 0) {
|
||||
code = _SJIS;
|
||||
} else if (strcmp(input_codename, "UTF-8") == 0) {
|
||||
code = _UTF8;
|
||||
} else if (strcmp(input_codename, "UTF-16") == 0) {
|
||||
code = _UTF16;
|
||||
} else if (strlen(input_codename) > 0) {
|
||||
code = _UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
return INT2FIX( code );
|
||||
}
|
||||
|
||||
|
||||
/* Initialize NKF Module */
|
||||
|
||||
void
|
||||
Init_nkf()
|
||||
{
|
||||
VALUE mKconv = rb_define_module("NKF");
|
||||
VALUE mKconv = rb_define_module("NKF");
|
||||
|
||||
rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2);
|
||||
rb_define_module_function(mKconv, "guess", rb_nkf_guess, 1);
|
||||
rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2);
|
||||
rb_define_module_function(mKconv, "guess", rb_nkf_guess1, 1);
|
||||
rb_define_module_function(mKconv, "guess1", rb_nkf_guess1, 1);
|
||||
rb_define_module_function(mKconv, "guess2", rb_nkf_guess2, 1);
|
||||
|
||||
rb_define_const(mKconv, "AUTO", INT2FIX(_AUTO));
|
||||
rb_define_const(mKconv, "JIS", INT2FIX(_JIS));
|
||||
rb_define_const(mKconv, "EUC", INT2FIX(_EUC));
|
||||
rb_define_const(mKconv, "SJIS", INT2FIX(_SJIS));
|
||||
rb_define_const(mKconv, "BINARY", INT2FIX(_BINARY));
|
||||
rb_define_const(mKconv, "NOCONV", INT2FIX(_NOCONV));
|
||||
rb_define_const(mKconv, "UNKNOWN", INT2FIX(_UNKNOWN));
|
||||
rb_define_const(mKconv, "AUTO", INT2FIX(_AUTO));
|
||||
rb_define_const(mKconv, "JIS", INT2FIX(_JIS));
|
||||
rb_define_const(mKconv, "EUC", INT2FIX(_EUC));
|
||||
rb_define_const(mKconv, "SJIS", INT2FIX(_SJIS));
|
||||
rb_define_const(mKconv, "BINARY", INT2FIX(_BINARY));
|
||||
rb_define_const(mKconv, "NOCONV", INT2FIX(_NOCONV));
|
||||
rb_define_const(mKconv, "ASCII", INT2FIX(_ASCII));
|
||||
rb_define_const(mKconv, "UTF8", INT2FIX(_UTF8));
|
||||
rb_define_const(mKconv, "UTF16", INT2FIX(_UTF16));
|
||||
rb_define_const(mKconv, "UTF32", INT2FIX(_UTF32));
|
||||
rb_define_const(mKconv, "UNKNOWN", INT2FIX(_UNKNOWN));
|
||||
rb_define_const(mKconv, "VERSION", rb_str_new2(RVersion));
|
||||
}
|
||||
|
|
568
ext/nkf/test.rb
568
ext/nkf/test.rb
|
@ -1,3 +1,19 @@
|
|||
#!/usr/local/bin/ruby
|
||||
#
|
||||
# nkf test program for nkf 1.7
|
||||
# Shinji KONO <kono@ie.u-ryukyu.ac.jp>
|
||||
# Sun Aug 18 12:25:40 JST 1996
|
||||
# Sun Nov 8 00:16:06 JST 1998
|
||||
#
|
||||
# This is useful when you add new patch on nkf.
|
||||
# Since this test is too strict, faileurs may not mean
|
||||
# wrong conversion.
|
||||
#
|
||||
# nkf 1.5 differs on MIME decoding
|
||||
# nkf 1.4 passes Basic Conversion tests
|
||||
# nkf PDS version passes Basic Conversion tests using "nkf -iB -oB "
|
||||
#
|
||||
|
||||
$counter = 0
|
||||
def result(result, message = nil)
|
||||
$counter += 1
|
||||
|
@ -49,41 +65,150 @@ end
|
|||
|
||||
|
||||
$detail = false
|
||||
def test(opt, input, expect)
|
||||
def test(opt, input, expects)
|
||||
print "\nINPUT:\n", input if $detail
|
||||
print "\nEXPECT:\n", expect if $detail
|
||||
print "\nEXPECT:\n", expects.to_s if $detail
|
||||
result = nkf(opt, input)
|
||||
print "\nGOT:\n", result if $detail
|
||||
|
||||
print result == expect ? "Ok\n" : "Fail\n"
|
||||
return result
|
||||
expects.each do |e|
|
||||
if result == e then
|
||||
puts "Ok"
|
||||
return result
|
||||
end
|
||||
end
|
||||
puts "Fail"
|
||||
end
|
||||
|
||||
# Basic Conversion
|
||||
print "\nBasic Conversion test\n\n"
|
||||
|
||||
example = {}
|
||||
example = Hash.new
|
||||
|
||||
# Basic Conversion
|
||||
print "\nBasic Conversion test\n\n";
|
||||
|
||||
# I gave up simple literal quote because there are big difference
|
||||
# on perl4 and perl5 on literal quote. Of course we cannot use
|
||||
# jperl.
|
||||
|
||||
example['jis'] = <<'eofeof'.unpack('u')[0]
|
||||
M1FER<W0@4W1A9V4@&R1"(3DQ(3%^2R%+?D]3&RA"(%-E8V]N9"!3=&%G92`;
|
||||
M)$)0)TU:&RA"($AI<F%G86YA(!LD0B0B)"0D)B0H)"HD;R1R)',;*$(*2V%T
|
||||
M86MA;F$@&R1")2(E)"4F)2@E*B5O)7(E<QLH0B!+:6=O=2`;)$(A)B%G(S`C
|
||||
/029!)E@G(B=!*$`;*$(*
|
||||
eofeof
|
||||
#'
|
||||
|
||||
example['sjis'] = <<'eofeof'.unpack('u')[0]
|
||||
M1FER<W0@4W1A9V4@@5B)0(F>ED"6GIAR(%-E8V]N9"!3=&%G92"8I9=Y($AI
|
||||
M<F%G86YA((*@@J*"I(*F@JB"[8+P@O$*2V%T86MA;F$@@T�X-%@T>#28./
|
||||
>@Y*#DR!+:6=O=2"!18&'@D^"8(._@]:$081@A+X*
|
||||
eofeof
|
||||
#'
|
||||
|
||||
example['euc'] = <<'eofeof'.unpack('u')[0]
|
||||
M1FER<W0@4W1A9V4@H;FQH;'^RZ'+_L_3(%-E8V]N9"!3=&%G92#0I\W:($AI
|
||||
M<F%G86YA(*2BI*2DIJ2HI*JD[Z3RI/,*2V%T86MA;F$@I:*EI*6FI:BEJJ7O
|
||||
>I?*E\R!+:6=O=2"AIJ'GH["CP:;!IMBGHJ?!J,`*
|
||||
eofeof
|
||||
#'
|
||||
|
||||
example['utf'] = <<'eofeof'.unpack('u')[0]
|
||||
M1FER<W0@4W1A9V4@XX"%Z9FBY;^<YK.5YKJ`Z(65(%-E8V]N9"!3=&%G92#D
|
||||
MN+SI@:4@2&ER86=A;F$@XX&"XX&$XX&&XX&(XX&*XX*/XX*2XX*3"DMA=&%K
|
||||
M86YA(.."HN."I.."IN."J.."JN.#K^.#LN.#LR!+:6=O=2#C@[OBB)[OO)#O
|
||||
.O*'.L<^)T)'0K^*5@@H`
|
||||
eofeof
|
||||
|
||||
|
||||
example['jis1'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1";3%Q<$$L&RA""ALD0F4Z3F\;*$(*&R1"<FT;*$()&R1"/F5.3D]+&RA"
|
||||
#"0D*
|
||||
eofeof
|
||||
|
||||
example['sjis1'] = <<'eofeof'.unpack('u')[0]
|
||||
8YU#ID)%+"N-9E^T*Z>L)C^.7S)AJ"0D*
|
||||
eofeof
|
||||
|
||||
example['euc1'] = <<'eofeof'.unpack('u')[0]
|
||||
8[;'Q\,&L"N6ZSN\*\NT)ON7.SL_+"0D*
|
||||
eofeof
|
||||
|
||||
example['utf1'] = <<'eofeof'.unpack('u')[0]
|
||||
AZ+J%Z:N/Z8JM"N>VNNFZEPKIM(D)Y+B*Z:"8Y+J8"0D*
|
||||
eofeof
|
||||
|
||||
example['jis2'] = <<'eofeof'.unpack('u')[0]
|
||||
+&R1".EA&(QLH0@H`
|
||||
eofeof
|
||||
|
||||
example['sjis2'] = <<'eofeof'.unpack('u')[0]
|
||||
%C=:3H0H`
|
||||
eofeof
|
||||
|
||||
example['euc2'] = <<'eofeof'.unpack('u')[0]
|
||||
%NMC&HPH`
|
||||
eofeof
|
||||
|
||||
example['utf2'] = <<'eofeof'.unpack('u')[0]
|
||||
'YI:.Z)>D"@``
|
||||
eofeof
|
||||
|
||||
# From JIS
|
||||
|
||||
print "JIS to JIS ... ";test('-j',example['jis'],[example['jis']]);
|
||||
print "JIS to SJIS... ";test('-s',example['jis'],[example['sjis']]);
|
||||
print "JIS to EUC ... ";test('-e',example['jis'],[example['euc']]);
|
||||
print "JIS to UTF8... ";test('-w',example['jis'],[example['utf']]);
|
||||
|
||||
# From SJIS
|
||||
|
||||
print "SJIS to JIS ... ";test('-j',example['sjis'],[example['jis']]);
|
||||
print "SJIS to SJIS... ";test('-s',example['sjis'],[example['sjis']]);
|
||||
print "SJIS to EUC ... ";test('-e',example['sjis'],[example['euc']]);
|
||||
print "SJIS to UTF8... ";test('-w',example['sjis'],[example['utf']]);
|
||||
|
||||
# From EUC
|
||||
|
||||
print "EUC to JIS ... ";test('-j',example['euc'],[example['jis']]);
|
||||
print "EUC to SJIS... ";test('-s',example['euc'],[example['sjis']]);
|
||||
print "EUC to EUC ... ";test('-e',example['euc'],[example['euc']]);
|
||||
print "EUC to UTF8... ";test('-w',example['euc'],[example['utf']]);
|
||||
|
||||
# From UTF8
|
||||
|
||||
print "UTF8 to JIS ... ";test('-j',example['utf'],[example['jis']]);
|
||||
print "UTF8 to SJIS... ";test('-s',example['utf'],[example['sjis']]);
|
||||
print "UTF8 to EUC ... ";test('-e',example['utf'],[example['euc']]);
|
||||
print "UTF8 to UTF8... ";test('-w',example['utf'],[example['utf']]);
|
||||
|
||||
|
||||
|
||||
# From JIS
|
||||
|
||||
print "JIS to JIS ... ";test('-j',example['jis1'],[example['jis1']]);
|
||||
print "JIS to SJIS... ";test('-s',example['jis1'],[example['sjis1']]);
|
||||
print "JIS to EUC ... ";test('-e',example['jis1'],[example['euc1']]);
|
||||
print "JIS to UTF8... ";test('-w',example['jis1'],[example['utf1']]);
|
||||
|
||||
# From SJIS
|
||||
|
||||
print "SJIS to JIS ... ";test('-j',example['sjis1'],[example['jis1']]);
|
||||
print "SJIS to SJIS... ";test('-s',example['sjis1'],[example['sjis1']]);
|
||||
print "SJIS to EUC ... ";test('-e',example['sjis1'],[example['euc1']]);
|
||||
print "SJIS to UTF8... ";test('-w',example['sjis1'],[example['utf1']]);
|
||||
|
||||
# From EUC
|
||||
|
||||
print "EUC to JIS ... ";test('-j',example['euc1'],[example['jis1']]);
|
||||
print "EUC to SJIS... ";test('-s',example['euc1'],[example['sjis1']]);
|
||||
print "EUC to EUC ... ";test('-e',example['euc1'],[example['euc1']]);
|
||||
print "EUC to UTF8... ";test('-w',example['euc1'],[example['utf1']]);
|
||||
|
||||
# From UTF8
|
||||
|
||||
print "UTF8 to JIS ... ";test('-j',example['utf1'],[example['jis1']]);
|
||||
print "UTF8 to SJIS... ";test('-s',example['utf1'],[example['sjis1']]);
|
||||
print "UTF8 to EUC ... ";test('-e',example['utf1'],[example['euc1']]);
|
||||
print "UTF8 to UTF8... ";test('-w',example['utf1'],[example['utf1']]);
|
||||
|
||||
# Ambigous Case
|
||||
|
||||
example['amb'] = <<'eofeof'.unpack('u')[0]
|
||||
MI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&E
|
||||
|
@ -117,6 +242,31 @@ M)4(;*$(*&RA))4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q
|
|||
>)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(;*$(*
|
||||
eofeof
|
||||
|
||||
print "Ambiguous Case. ";
|
||||
test('-j',example['amb'],[example['amb.euc']]);
|
||||
|
||||
# Input assumption
|
||||
|
||||
print "SJIS Input assumption ";
|
||||
test('-jSx',example['amb'],[example['amb.sjis']]);
|
||||
|
||||
# Broken JIS
|
||||
|
||||
print "Broken JIS ";
|
||||
$input = example['jis'];
|
||||
$input.gsub("\033",'');
|
||||
test('-Be',$input,[example['euc']]);
|
||||
print "Broken JIS is safe on Normal JIS? ";
|
||||
$input = example['jis'];
|
||||
test('-Be',$input,[example['euc']]);
|
||||
|
||||
# X0201 ²¾Ì¾
|
||||
# X0201->X0208 conversion
|
||||
# X0208 aphabet -> ASCII
|
||||
# X0201 Áê¸ßÊÑ´¹
|
||||
|
||||
print "\nX0201 test\n\n";
|
||||
|
||||
example['x0201.sjis'] = <<'eofeof'.unpack('u')[0]
|
||||
MD5.*<(-*@TR#3H-0@U*#2X--@T^#48-3"I%3B7""8()A@F*"8X)D@F6"9H*!
|
||||
M@H*"@X*$@H6"AH*'"I%3BTR-AH%)@9>!E(&0@9.!3X&5@9:!:8%J@7R!>X&!
|
||||
|
@ -124,7 +274,6 @@ M@6V!;H%O@7"!CPJ4O(IPMK>X/;FZMMZWWKC>N=ZZWH+&"I2\BG#*W\O?S-_-
|
|||
MW\[?M]^QW@K*W\O?S`IH86YK86MU(,K?R]_,I`K*W\O?S-VA"I2\BG""S(SC
|
||||
!"@!"
|
||||
eofeof
|
||||
#'
|
||||
|
||||
example['x0201.euc'] = <<'eofeof'.unpack('u')[0]
|
||||
MP;2ST:6KI:VEKZ6QI;.EK*6NI;"ELJ6T"L&TL=&CP:/"H\.CQ*/%H\:CQZ/A
|
||||
|
@ -134,7 +283,17 @@ MWJ3("LB^L]&.RH[?CLN.WX[,CM^.S8[?CLZ.WXZWCM^.L8[>"H[*CM^.RX[?
|
|||
MCLP*:&%N:V%K=2".RH[?CLN.WX[,CJ0*CLJ.WX[+CM^.S([=CJ$*R+ZST:3.
|
||||
#N.4*
|
||||
eofeof
|
||||
#'
|
||||
|
||||
example['x0201.utf'] = <<'eofeof'.unpack('u')[0]
|
||||
MY86HZ*>2XX*KXX*MXX*OXX*QXX*SXX*LXX*NXX*PXX*RXX*T"N6%J.B+L>^\
|
||||
MH>^\HN^\H^^\I.^\I>^\IN^\I^^]@>^]@N^]@^^]A.^]A>^]AN^]APKEA:CH
|
||||
MJ)CEC[?OO('OO*#OO(/OO(3OO(7OO+[OO(;OO(KOO(COO(GBB)+OO(OOO)WO
|
||||
MO+OOO+WOO9OOO9WOOZ4*Y8V*Z*>2[[VV[[VW[[VX/>^]N>^]NN^]MN^^GN^]
|
||||
MM^^^GN^]N.^^GN^]N>^^GN^]NN^^GN.!J`KEC8KHIY+OOHKOOI_OOHOOOI_O
|
||||
MOHSOOI_OOHWOOI_OOH[OOI_OO;?OOI_OO;'OOIX*[[Z*[[Z?[[Z+[[Z?[[Z,
|
||||
M"FAA;FMA:W4@[[Z*[[Z?[[Z+[[Z?[[Z,[[VD"N^^BN^^G^^^B^^^G^^^C.^^
|
||||
2G>^]H0KEC8KHIY+C@:[EOHP*
|
||||
eofeof
|
||||
|
||||
example['x0201.jis'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1"030S424K)2TE+R4Q)3,E+"4N)3`E,B4T&RA""ALD0D$T,5$C02-"(T,C
|
||||
|
@ -144,7 +303,6 @@ M/1LH23DZ-EXW7CA>.5XZ7ALD0B1(&RA""ALD0D@^,U$;*$E*7TM?3%]-7TY?
|
|||
M-U\Q7ALH0@H;*$E*7TM?3!LH0@IH86YK86MU(!LH24I?2U],)!LH0@H;*$E*
|
||||
97TM?3%TA&RA""ALD0D@^,U$D3CAE&RA""@``
|
||||
eofeof
|
||||
#`
|
||||
|
||||
example['x0201.sosi'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1"030S424K)2TE+R4Q)3,E+"4N)3`E,B4T&RA*"ALD0D$T,5$C02-"(T,C
|
||||
|
@ -154,7 +312,6 @@ M*$H]#CDZ-EXW7CA>.5XZ7@\;)$(D2!LH2@H;)$)(/C-1&RA*#DI?2U],7TU?
|
|||
M3E\W7S%>#PH.2E]+7TP/&RA*"FAA;FMA:W4@#DI?2U],)`\;*$H*#DI?2U],
|
||||
672$/&RA*"ALD0D@^,U$D3CAE&RA""@``
|
||||
eofeof
|
||||
#"
|
||||
|
||||
example['x0201.x0208'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1"030S424K)2TE+R4Q)3,E+"4N)3`E,B4T&RA""ALD0D$T,5$;*$)!0D-$
|
||||
|
@ -164,7 +321,34 @@ M)$)(/C-1)5$E5"57)5HE724M(2PE(B$K&RA""ALD0B51)50E51LH0@IH86YK
|
|||
M86MU(!LD0B51)50E52$B&RA""ALD0B51)50E525S(2,;*$(*&R1"2#XS421.
|
||||
&.&4;*$(*
|
||||
eofeof
|
||||
#`
|
||||
|
||||
# -X is necessary to allow X0201 in SJIS
|
||||
# -Z convert X0208 alphabet to ASCII
|
||||
print "X0201 conversion: SJIS ";
|
||||
test('-jXZ',example['x0201.sjis'],[example['x0201.x0208']]);
|
||||
print "X0201 conversion: JIS ";
|
||||
test('-jZ',example['x0201.jis'],[example['x0201.x0208']]);
|
||||
print "X0201 conversion:SI/SO ";
|
||||
test('-jZ',example['x0201.sosi'],[example['x0201.x0208']]);
|
||||
print "X0201 conversion: EUC ";
|
||||
test('-jZ',example['x0201.euc'],[example['x0201.x0208']]);
|
||||
print "X0201 conversion: UTF8 ";
|
||||
test('-jZ',example['x0201.utf'],[example['x0201.x0208']]);
|
||||
# -x means X0201 output
|
||||
print "X0201 output: SJIS ";
|
||||
test('-xs',example['x0201.euc'],[example['x0201.sjis']]);
|
||||
print "X0201 output: JIS ";
|
||||
test('-xj',example['x0201.sjis'],[example['x0201.jis']]);
|
||||
print "X0201 output: EUC ";
|
||||
test('-xe',example['x0201.jis'],[example['x0201.euc']]);
|
||||
print "X0201 output: UTF8 ";
|
||||
test('-xw',example['x0201.jis'],[example['x0201.utf']]);
|
||||
|
||||
# MIME decode
|
||||
|
||||
print "\nMIME test\n\n";
|
||||
|
||||
# MIME ISO-2022-JP
|
||||
|
||||
example['mime.iso2022'] = <<'eofeof'.unpack('u')[0]
|
||||
M/3])4T\M,C`R,BU*4#]"/T=Y4D%.144W96E23TI566Q/4U9)1WEH2S\]"CT_
|
||||
|
@ -178,7 +362,6 @@ M96E23U!Y:S=D"FAS;U-G/3T_/2`]/TE33RTR,`HR,BU*4#]"/T=Y4D%.144W
|
|||
M96E23U!Y:S=D:'-O4V<]/3\]"CT_25-/+3(P,C(M2E`_0C]'>5)!3D5%-V5I
|
||||
44D]*55EL3QM;2U-624=Y:$L_/0H_
|
||||
eofeof
|
||||
#'
|
||||
|
||||
example['mime.ans.strict'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1"-$$[>B1.)48E.25(&RA""ALD0C1!.WHD3B5&)3DE2!LH0@H;)$(D1B11
|
||||
|
@ -188,7 +371,6 @@ M/3])4T\M,C`R,BU*4#]"/T=Y4D%.144W96E23U!Y:S=D"FAS;U-G/3T_/2`]
|
|||
M/TE33RTR,`HR,BU*4#]"/T=Y4D%.144W96E23U!Y:S=D:'-O4V<]/3\]"CT_
|
||||
L25-/+3(P,C(M2E`_0C]'>5)!3D5%-V5I4D]*55EL3QM;2U-624=Y:$L_/0H_
|
||||
eofeof
|
||||
#'
|
||||
|
||||
example['mime.unbuf.strict'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1"-$$[>B1.)48E.25(&RA""ALD0C1!.WHD3B5&)3DE2!LH0@H;)$(D1B11
|
||||
|
@ -206,7 +388,6 @@ M(&QI;F4*&R1"-$$[>B1./RD[=C1!.WHD3C\I.W8;*$(*0G)O:V5N(&-A<V4*
|
|||
M&R1"-$$[>B1./RD;*$)H<V]39ST]/ST@&R1"-$$[>B1./RD[=ALH0@H;)$(T
|
||||
603MZ)$XE1ALH0EM+4U9)1WEH2S\]"@`*
|
||||
eofeof
|
||||
#"
|
||||
|
||||
example['mime.unbuf'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1"-$$[>B1.)48E.25(&RA""ALD0C1!.WHD3B5&)3DE2!LH0@H;)$(D1B11
|
||||
|
@ -215,21 +396,48 @@ M(&QI;F4*&R1"-$$[>B1./RD[=C1!.WHD3C\I.W8;*$(*0G)O:V5N(&-A<V4*
|
|||
M&R1"-$$[>B1./RD;*$)H<V]39ST]/ST@&R1"-$$[>B1./RD[=ALH0@H;)$(T
|
||||
603MZ)$XE1ALH0EM+4U9)1WEH2S\]"@`*
|
||||
eofeof
|
||||
#"
|
||||
|
||||
example['mime.base64'] = <<'eofeof'.unpack('u')[0]
|
||||
M9W-M5"])3&YG<FU#>$I+-&=Q=4,S24LS9W%Q0E%:3TUI-39,,S0Q-&=S5T)1
|
||||
M43!+9VUA1%9O3T@*9S)+1%1O3'=K8C)1;$E+;V=Q2T-X24MG9W5M0W%*3EEG
|
||||
<<T=#>$E+9V=U;4,X64Q&9W)70S592VMG<6U""F=Q
|
||||
eofeof
|
||||
#"
|
||||
|
||||
example['mime.base64.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1")$M&?B1I)#LD1D0Z)"TD7B0Y)"PA(D5L-7XV83E9)$<A(ALH0@T*&R1"
|
||||
M(T<E-R5G)4,E+R1R0C\_="0J)"0D1B0B)&LD*D4Y)$,D1B0B)&LD<R1')#<D
|
||||
(9R0F)"L;*$(E
|
||||
eofeof
|
||||
#'
|
||||
|
||||
# print "Next test is expected to Fail.\n";
|
||||
print "MIME decode (strict) ";
|
||||
$tmp = test('-jmS',example['mime.iso2022'],[example['mime.ans.strict']]);
|
||||
|
||||
example['mime.ans.alt'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1"-$$[>B1.)48E.25(&RA""ALD0C1!.WHD3B5&)3DE2!LH0@H;)$(D1B11
|
||||
M&RA"96YD"ALD0B0])"8D*R1*&RA"&R1"-$$[>B1./RD[=ALH0F5N9&]F;&EN
|
||||
M90H;)$(T03MZ)$X_*3MV-$$[>B1./RD[=ALH0@I"<F]K96YC87-E"ALD0C1!
|
||||
H.WHD3C\I.W8T03MZ)$X_*3MV&RA""ALD0C1!.WHD3B5&)3DE)!LH0@``
|
||||
eofeof
|
||||
|
||||
example['mime.unbuf.alt'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1"-$$[>B1.)48E.25(&RA""ALD0C1!.WHD3B5&)3DE2!LH0@H;)$(D1B11
|
||||
M&RA"96YD"ALD0B0])"8D*R1*&RA"&R1"-$$[>B1./RD[=ALH0F5N9&]F;&EN
|
||||
M90H;)$(T03MZ)$X_*3MV-$$[>B1./RD[=ALH0@I"<F]K96YC87-E"ALD0C1!
|
||||
H.WHD3C\I.W8T03MZ)$X_*3MV&RA""ALD0C1!.WHD3B5&)3DE)!LH0@``
|
||||
eofeof
|
||||
|
||||
print "MIME decode (nonstrict)";
|
||||
$tmp = test('-jmN',example['mime.iso2022'],[example['mime.ans'],example['mime.ans.alt']]);
|
||||
# open(OUT,">tmp1");print OUT pack('u',$tmp);close(OUT);
|
||||
# unbuf mode implies more pessimistic decode
|
||||
print "MIME decode (unbuf) ";
|
||||
$tmp = test('-jmNu',example['mime.iso2022'],[example['mime.unbuf'],example['mime.unbuf.alt']]);
|
||||
# open(OUT,">tmp2");print OUT pack('u',$tmp);close(OUT);
|
||||
print "MIME decode (base64) ";
|
||||
test('-jTmB',example['mime.base64'],[example['mime.base64.ans']]);
|
||||
|
||||
# MIME ISO-8859-1
|
||||
|
||||
example['mime.is8859'] = <<'eofeof'.unpack('u')[0]
|
||||
M/3])4T\M.#@U.2TQ/U$_*CU#-V%V83\_/2`*4&5E<B!4]G)N9W)E;@I,87-S
|
||||
|
@ -244,75 +452,255 @@ M("!<(")-:6X@:V%E<&AE<W0@:&%R(&9A865T(&5T(&9O96PA(@I!87)H=7,@
|
|||
M56YI=F5R<VET>2P@1$5.34%22R`@7"`B36EN(&OF<&AE<W0@:&%R(&;E970@
|
||||
)970@9OAL(2(*
|
||||
eofeof
|
||||
#"
|
||||
|
||||
print 'JIS to JIS ... '; test(' ', example['jis'], example['jis'])
|
||||
print 'JIS to SJIS... '; test('-s', example['jis'], example['sjis'])
|
||||
print 'JIS to EUC ... '; test('-e', example['jis'], example['euc'])
|
||||
|
||||
print 'SJIS to JIS ... '; test('-j', example['sjis'], example['jis'])
|
||||
print 'SJIS to SJIS... '; test('-s', example['sjis'], example['sjis'])
|
||||
print 'SJIS to EUC ... '; test('-e', example['sjis'], example['euc'])
|
||||
|
||||
print 'EUC to JIS ... '; test(' ', example['euc'], example['jis'])
|
||||
print 'EUC to SJIS... '; test('-s', example['euc'], example['sjis'])
|
||||
print 'EUC to EUC ... '; test('-e', example['euc'], example['euc'])
|
||||
|
||||
|
||||
# Ambigous Case
|
||||
print 'Ambiguous Case. '; test('' , example['amb'], example['amb.euc'])
|
||||
|
||||
# Input assumption
|
||||
print 'SJIS Input assumption '
|
||||
test('-Sx', example['amb'], example['amb.sjis'])
|
||||
|
||||
# X0201 ²¾Ì¾
|
||||
# X0201->X0208 conversion
|
||||
# X0208 aphabet -> ASCII
|
||||
# X0201 Áê¸ßÊÑ´¹
|
||||
|
||||
print "\nX0201 test\n\n"
|
||||
|
||||
# -X is necessary to allow X0201 in SJIS
|
||||
# -Z convert X0208 alphabet to ASCII
|
||||
print 'X0201 conversion: SJIS '
|
||||
test('-XZ', example['x0201.sjis'], example['x0201.x0208'])
|
||||
print 'X0201 conversion: JIS '
|
||||
test('-Z', example['x0201.jis'], example['x0201.x0208'])
|
||||
print 'X0201 conversion:SI/SO '
|
||||
test('-Z', example['x0201.sosi'], example['x0201.x0208'])
|
||||
print 'X0201 conversion: EUC '
|
||||
test('-Z', example['x0201.euc'], example['x0201.x0208'])
|
||||
# -x means X0201 output
|
||||
print 'X0201 output: SJIS '
|
||||
test('-xs', example['x0201.euc'], example['x0201.sjis'])
|
||||
print 'X0201 output: JIS '
|
||||
test('-xj', example['x0201.sjis'], example['x0201.jis'])
|
||||
print 'X0201 output: EUC '
|
||||
test('-xe', example['x0201.jis'], example['x0201.euc'])
|
||||
|
||||
# MIME decode
|
||||
|
||||
print "\nMIME test\n\n"
|
||||
|
||||
# MIME ISO-2022-JP
|
||||
|
||||
print "Next test is expeced to Fail.\n"
|
||||
|
||||
print 'MIME decode (strict) '
|
||||
tmp = test('-m', example['mime.iso2022'], example['mime.ans.strict'])
|
||||
print 'MIME decode (nonstrict)'
|
||||
tmp = test('-m', example['mime.iso2022'], example['mime.ans'])
|
||||
# open(OUT,'>tmp1');print OUT pack('u',$tmp);close(OUT);
|
||||
# unbuf mode implies more pessimistic decode
|
||||
print 'MIME decode (unbuf) '
|
||||
test('-mu', example['mime.iso2022'], example['mime.unbuf'])
|
||||
print 'MIME decode (base64) '
|
||||
t = test('-mB', example['mime.base64'], example['mime.base64.ans'])
|
||||
|
||||
# MIME ISO-8859-1
|
||||
|
||||
# Without -l, ISO-8859-1 was handled as X0201.
|
||||
|
||||
print 'MIME ISO-8859-1 (Q) '
|
||||
test('-ml', example['mime.is8859'], example['mime.is8859.ans'])
|
||||
print "MIME ISO-8859-1 (Q) ";
|
||||
test('-ml',example['mime.is8859'],[example['mime.is8859.ans']]);
|
||||
|
||||
# test for -f is not so simple.
|
||||
|
||||
print "\nBug Fixes\n\n";
|
||||
|
||||
# test_data/cr
|
||||
|
||||
example['test_data/cr'] = <<'eofeof'.unpack('u')[0]
|
||||
1I,:DN:3(#71E<W0-=&5S=`T`
|
||||
eofeof
|
||||
|
||||
example['test_data/cr.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
7&R1")$8D.21(&RA""G1E<W0*=&5S=`H`
|
||||
eofeof
|
||||
|
||||
print "test_data/cr ";
|
||||
test('-jd',example['test_data/cr'],[example['test_data/cr.ans']]);
|
||||
# test_data/fixed-qencode
|
||||
|
||||
example['test_data/fixed-qencode'] = <<'eofeof'.unpack('u')[0]
|
||||
M("`@("`@("`],4(D0CYE/STS1#TQ0BA""B`@("`@("`@/3%")$(^93TS1CTS
|
||||
'1#TQ0BA""@``
|
||||
eofeof
|
||||
|
||||
example['test_data/fixed-qencode.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
F("`@("`@("`;)$(^93\]&RA""B`@("`@("`@&R1"/F4_/1LH0@H`
|
||||
eofeof
|
||||
|
||||
print "test_data/fixed-qencode ";
|
||||
test('-jmQ',example['test_data/fixed-qencode'],[example['test_data/fixed-qencode.ans']]);
|
||||
# test_data/long-fold-1
|
||||
|
||||
example['test_data/long-fold-1'] = <<'eofeof'.unpack('u')[0]
|
||||
MI,JDK*2DI,JDK*2DI,JDK*'!I*2DKJ3GI*:DK*2BI.JDWJ2WI,:AHJ2SI.RD
|
||||
M\J2]I,ZDWJ3>I**DQ*2KI*:DR*&BI,FDIJ3BI-^DT*2HI*RD[Z3KI*2DMZ&B
|
||||
MI,BDP:3EI*:DQZ3!I.>D\Z2NI.RDZZ2KI.*DMZ3SI,JDI*&C"J2SI+.DSR!#
|
||||
M4B],1B"DSKG4H:,-"J2SI+.DSR!#4B"DSKG4H:,-I+.DLZ3/($Q&+T-2(*3.
|
||||
9N=2AHPH-"J2SI+.DSR!,1B"DSKG4H:,*"@``
|
||||
eofeof
|
||||
|
||||
example['test_data/long-fold-1.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1")$HD+"0D)$HD+"0D)$HD+"%!)"0D+B1G)"8D+"0B)&HD7B0W)$8A(B0S
|
||||
M)&PD<B0])$XD7B1>)"(D1"0K&RA""ALD0B0F)$@A(B1))"8D8B1?)%`D*"0L
|
||||
M)&\D:R0D)#<A(B1()$$D920F)$<D021G)',D+B1L)&LD*R1B)#<D<QLH0@H;
|
||||
M)$(D2B0D(2,;*$(*&R1")#,D,R1/&RA"($-2+TQ&(!LD0B1..50A(QLH0@H;
|
||||
M)$(D,R0S)$\;*$(@0U(@&R1")$XY5"$C&RA""ALD0B0S)#,D3QLH0B!,1B]#
|
||||
M4B`;)$(D3CE4(2,;*$(*"ALD0B0S)#,D3QLH0B!,1B`;)$(D3CE4(2,;*$(*
|
||||
!"@``
|
||||
eofeof
|
||||
|
||||
print "test_data/long-fold-1 ";
|
||||
test('-jTF60',example['test_data/long-fold-1'],[example['test_data/long-fold-1.ans']]);
|
||||
# test_data/long-fold
|
||||
|
||||
example['test_data/long-fold'] = <<'eofeof'.unpack('u')[0]
|
||||
MI,JDK*2DI,JDK*2DI,JDK*'!I*2DKJ3GI*:DK*2BI.JDWJ2WI,:AHJ2SI.RD
|
||||
M\J2]I,ZDWJ3>I**DQ*2KI*:DR*&BI,FDIJ3BI-^DT*2HI*RD[Z3KI*2DMZ&B
|
||||
MI,BDP:3EI*:DQZ3!I.>D\Z2NI.RDZZ2KI.*DMZ3SI,JDI*&C"J2SI+.DS\.[
|
||||
'I*2YU*&C"@``
|
||||
eofeof
|
||||
|
||||
example['test_data/long-fold.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1")$HD+"0D)$HD+"0D)$HD+"%!)"0D+B1G)"8D+"0B)&HD7B0W)$8A(B0S
|
||||
M)&PD<B0])$XD7B1>)"(D1"0K&RA""ALD0B0F)$@A(B1))"8D8B1?)%`D*"0L
|
||||
M)&\D:R0D)#<A(B1()$$D920F)$<D021G)',D+B1L)&LD*R1B)#<D<QLH0@H;
|
||||
:)$(D2B0D(2,D,R0S)$]#.R0D.50A(QLH0@H`
|
||||
eofeof
|
||||
|
||||
print "test_data/long-fold ";
|
||||
test('-jTf60',example['test_data/long-fold'],[example['test_data/long-fold.ans']]);
|
||||
# test_data/mime_out
|
||||
|
||||
example['test_data/mime_out'] = <<'eofeof'.unpack('u')[0]
|
||||
M"BTM+2T*4W5B:F5C=#H@86%A82!A86%A(&%A86$@86%A82!A86%A(&%A86$@
|
||||
M86%A82!A86%A(&%A86$@86%A82!A86%A(&%A86$@86%A82!A86%A"BTM+2T*
|
||||
M4W5B:F5C=#H@I**DI*2FI*BDJJ2KI*VDKZ2QI+.DM:2WI+FDNZ2]I+^DP:3$
|
||||
MI,:DR*3*I,NDS*3-I,ZDSZ32I-6DV*3;I-ZDWZ3@I.&DXJ3DI*2DYJ2HI.@*
|
||||
M+2TM+0I3=6)J96-T.B!A86%A(&%A86$@86%A82!A86%A(&%A86$@86%A82!A
|
||||
I86%A(*2BI*2DIJ2HI*H@86%A82!A86%A(&%A86$@86%A80HM+2TM"@H`
|
||||
eofeof
|
||||
|
||||
example['test_data/mime_out.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
M"BTM+2T*4W5B:F5C=#H@86%A82!A86%A(&%A86$@86%A82!A86%A(&%A86$@
|
||||
M86%A82!A86%A(&%A86$*(&%A86$@86%A82!A86%A(&%A86$@86%A80HM+2TM
|
||||
M"E-U8FIE8W0Z(#T_25-/+3(P,C(M2E`_0C]'>5)#2D-):TI#46U*0V=K2VE1
|
||||
M<DI#,&M,>5%X2D1-:TY343-*1&MK3WAS;U%G/3T_/2`*"3T_25-/+3(P,C(M
|
||||
M2E`_0C]'>5)#2D0P:U!Y4D)*15%K4FE224I%;VM3>5)-2D4P:U1I4E!*1DEK
|
||||
M5E-264=Y:$,_/2`*"3T_25-/+3(P,C(M2E`_0C]'>5)#2D9S:UAI4F9*1T%K
|
||||
M65-2:4I'46M*0U)M2D-G:V%"<V]19ST]/ST@"BTM+2T*4W5B:F5C=#H@86%A
|
||||
M82!A86%A(&%A86$@86%A82!A86%A(&%A86$@86%A82`]/TE33RTR,#(R+4I0
|
||||
M/T(_1WE20TI#26)+14D]/ST@"@D]/TE33RTR,#(R+4I0/T(_1WE20TI#46M*
|
||||
J:5%O2D-O8DM%23T_/2`@86%A80H@86%A82!A86%A(&%A86$*+2TM+0H*
|
||||
eofeof
|
||||
|
||||
print "test_data/mime_out ";
|
||||
test('-jM',example['test_data/mime_out'],[example['test_data/mime_out.ans']]);
|
||||
# test_data/multi-line
|
||||
|
||||
example['test_data/multi-line'] = <<'eofeof'.unpack('u')[0]
|
||||
MI,JDK*2DI,JDK*2DI,JDK*'!I*2DKJ3GI*:DK*2BI.JDWJ2WI,:AH@"DLZ3L
|
||||
MI/*DO:3.I-ZDWJ2BI,2DJZ2FI,BAHJ3)I*:DXJ3?I-"DJ*2LI.^DZZ2DI+>A
|
||||
MHJ3(I,&DY:2FI,>DP:3GI/.DKJ3LI.NDJZ3BI+>D\Z3*I*2AHPJDLZ2SI,_#
|
||||
8NZ2DN=2AHP`*I+.DLZ3/P[NDI+G4H:,*
|
||||
eofeof
|
||||
|
||||
example['test_data/multi-line.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
MI,JDK*2DI,JDK*2DI,JDK*'!I*2DKJ3GI*:DK*2BI.JDWJ2WI,:AH@"DLZ3L
|
||||
MI/*DO:3.I-ZDWJ2BI,2DJZ2FI,BAHJ3)I*:DXJ3?I-"DJ*2LI.^DZZ2DI+>A
|
||||
MHJ3(I,&DY:2FI,>DP:3GI/.DKJ3LI.NDJZ3BI+>D\Z3*I*2AHPJDLZ2SI,_#
|
||||
8NZ2DN=2AHP`*I+.DLZ3/P[NDI+G4H:,*
|
||||
eofeof
|
||||
|
||||
print "test_data/multi-line ";
|
||||
test('-e',example['test_data/multi-line'],[example['test_data/multi-line.ans']]);
|
||||
# test_data/nkf-19-bug-1
|
||||
|
||||
example['test_data/nkf-19-bug-1'] = <<'eofeof'.unpack('u')[0]
|
||||
,I*:DJZ2D"KK8QJ,*
|
||||
eofeof
|
||||
|
||||
example['test_data/nkf-19-bug-1.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
8&R1")"8D*R0D&RA""ALD0CI81B,;*$(*
|
||||
eofeof
|
||||
|
||||
print "test_data/nkf-19-bug-1 ";
|
||||
test('-Ej',example['test_data/nkf-19-bug-1'],[example['test_data/nkf-19-bug-1.ans']]);
|
||||
# test_data/nkf-19-bug-2
|
||||
|
||||
example['test_data/nkf-19-bug-2'] = <<'eofeof'.unpack('u')[0]
|
||||
%I-NDL@H`
|
||||
eofeof
|
||||
|
||||
example['test_data/nkf-19-bug-2.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
%I-NDL@H`
|
||||
eofeof
|
||||
|
||||
print "test_data/nkf-19-bug-2 ";
|
||||
test('-Ee',example['test_data/nkf-19-bug-2'],[example['test_data/nkf-19-bug-2.ans']]);
|
||||
# test_data/nkf-19-bug-3
|
||||
|
||||
example['test_data/nkf-19-bug-3'] = <<'eofeof'.unpack('u')[0]
|
||||
8[;'Q\,&L"N6ZSN\*\NT)ON7.SL_+"0D*
|
||||
eofeof
|
||||
|
||||
example['test_data/nkf-19-bug-3.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
8[;'Q\,&L"N6ZSN\*\NT)ON7.SL_+"0D*
|
||||
eofeof
|
||||
|
||||
print "test_data/nkf-19-bug-3 ";
|
||||
test('-e',example['test_data/nkf-19-bug-3'],[example['test_data/nkf-19-bug-3.ans']]);
|
||||
# test_data/non-strict-mime
|
||||
|
||||
example['test_data/non-strict-mime'] = <<'eofeof'.unpack('u')[0]
|
||||
M/3])4T\M,C`R,BU*4#]"/PIG<U-#;V]+.6=R-D-O;TQ%9W1Y0W0T1D-$46].
|
||||
M0V\V16=S,D]N;T999S1Y1%=)3$IG=4-0:UD*2W!G<FU#>$E+:6=R,D-V;TMI
|
||||
,9W-30V]O3&,*/ST*
|
||||
eofeof
|
||||
|
||||
example['test_data/non-strict-mime.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1")$8D)"0_)$`D)"1&)%XD.2$C&RA"#0H-"ALD0CMD)$\[?B$Y)6PE.21+
|
||||
<)&(]<20K)#LD1B0D)#\D0"0D)$8D)"1>&RA""@``
|
||||
eofeof
|
||||
|
||||
print "test_data/non-strict-mime ";
|
||||
test('-jTmN',example['test_data/non-strict-mime'],[example['test_data/non-strict-mime.ans']]);
|
||||
# test_data/q-encode-softrap
|
||||
|
||||
example['test_data/q-encode-softrap'] = <<'eofeof'.unpack('u')[0]
|
||||
H/3%")$(T03MZ)3T*,R$\)4DD3CTQ0BA""CTQ0B1"2E$T.3TQ0BA""@``
|
||||
eofeof
|
||||
|
||||
example['test_data/q-encode-softrap.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
>&R1"-$$[>B4S(3PE221.&RA""ALD0DI1-#D;*$(*
|
||||
eofeof
|
||||
|
||||
print "test_data/q-encode-softrap ";
|
||||
test('-jTmQ',example['test_data/q-encode-softrap'],[example['test_data/q-encode-softrap.ans']]);
|
||||
# test_data/rot13
|
||||
|
||||
example['test_data/rot13'] = <<'eofeof'.unpack('u')[0]
|
||||
MI+.D\Z3+I,&DSZ&BS:W"]*3(I*2DI*3>I+FAHPH*;FMF('9E<BXQ+CDR(*3R
|
||||
MS?C-T:2UI+NDQJ2DI+^DP*2DI,:DI*3>I+FDK*&B05-#24D@I,O"T*2WI,8@
|
||||
M4D]4,3,@I*P*P+6DMZ2OQK"DI*3&I*2DRJ2DI.BDIJ3'H:*PRK*\I,ZDZ*2F
|
||||
MI,O*T;2YI+6D[*3>I+ND\Z&C"@HE(&5C:&\@)VAO9V4G('P@;FMF("UR"FAO
|
||||
#9V4*
|
||||
eofeof
|
||||
|
||||
example['test_data/rot13.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
M&R1"4V)31%-Z4W!3?E!1?%QQ15-W4U-34U,O4VA04ALH0@H*87AS(&ER92XQ
|
||||
M+CDR(!LD0E-#?$E\(E-D4VI3=5-34VY3;U-34W534U,O4VA36U!1&RA"3D90
|
||||
M5E8@&R1"4WIQ(5-F4W4;*$(@14)',3,@&R1"4UL;*$(*&R1";V139E->=5]3
|
||||
M4U-U4U-3>5-34SE355-V4%%?>6%K4WU3.5-54WIY(F-H4V13/5,O4VI31%!2
|
||||
A&RA""@HE(')P=6(@)W5B='(G('P@87AS("UE"G5B='(*
|
||||
eofeof
|
||||
|
||||
print "test_data/rot13 ";
|
||||
test('-jr',example['test_data/rot13'],[example['test_data/rot13.ans']]);
|
||||
# test_data/slash
|
||||
|
||||
example['test_data/slash'] = <<'eofeof'.unpack('u')[0]
|
||||
7("`]/U8\5"U5.5=%2RTK.U<U32LE+PH`
|
||||
eofeof
|
||||
|
||||
example['test_data/slash.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
7("`]/U8\5"U5.5=%2RTK.U<U32LE+PH`
|
||||
eofeof
|
||||
|
||||
print "test_data/slash ";
|
||||
test(' ',example['test_data/slash'],[example['test_data/slash.ans']]);
|
||||
# test_data/z1space-0
|
||||
|
||||
example['test_data/z1space-0'] = <<'eofeof'.unpack('u')[0]
|
||||
"H:$`
|
||||
eofeof
|
||||
|
||||
example['test_data/z1space-0.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
"H:$`
|
||||
eofeof
|
||||
|
||||
print "test_data/z1space-0 ";
|
||||
test('-e -Z',example['test_data/z1space-0'],[example['test_data/z1space-0.ans']]);
|
||||
# test_data/z1space-1
|
||||
|
||||
example['test_data/z1space-1'] = <<'eofeof'.unpack('u')[0]
|
||||
"H:$`
|
||||
eofeof
|
||||
|
||||
example['test_data/z1space-1.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
!(```
|
||||
eofeof
|
||||
|
||||
print "test_data/z1space-1 ";
|
||||
test('-e -Z1',example['test_data/z1space-1'],[example['test_data/z1space-1.ans']]);
|
||||
# test_data/z1space-2
|
||||
|
||||
example['test_data/z1space-2'] = <<'eofeof'.unpack('u')[0]
|
||||
"H:$`
|
||||
eofeof
|
||||
|
||||
example['test_data/z1space-2.ans'] = <<'eofeof'.unpack('u')[0]
|
||||
"("``
|
||||
eofeof
|
||||
|
||||
print "test_data/z1space-2 ";
|
||||
test('-e -Z2',example['test_data/z1space-2'],[example['test_data/z1space-2.ans']]);
|
||||
|
||||
# end
|
||||
|
|
Загрузка…
Ссылка в новой задаче