зеркало из https://github.com/github/ruby.git
Extract nkf
This commit is contained in:
Родитель
34315510d3
Коммит
df70faa9c9
181
ext/nkf/depend
181
ext/nkf/depend
|
@ -1,181 +0,0 @@
|
|||
# BSD make needs "nkf.o: nkf.c" dependency BEFORE "nkf.o: nkf-utf8/nkf.c".
|
||||
# It seems BSD make searches the target for implicit rule in dependencies at first.
|
||||
nkf.o: nkf.c
|
||||
|
||||
# AUTOGENERATED DEPENDENCIES START
|
||||
nkf.o: $(RUBY_EXTCONF_H)
|
||||
nkf.o: $(arch_hdrdir)/ruby/config.h
|
||||
nkf.o: $(hdrdir)/ruby/assert.h
|
||||
nkf.o: $(hdrdir)/ruby/backward.h
|
||||
nkf.o: $(hdrdir)/ruby/backward/2/assume.h
|
||||
nkf.o: $(hdrdir)/ruby/backward/2/attributes.h
|
||||
nkf.o: $(hdrdir)/ruby/backward/2/bool.h
|
||||
nkf.o: $(hdrdir)/ruby/backward/2/inttypes.h
|
||||
nkf.o: $(hdrdir)/ruby/backward/2/limits.h
|
||||
nkf.o: $(hdrdir)/ruby/backward/2/long_long.h
|
||||
nkf.o: $(hdrdir)/ruby/backward/2/stdalign.h
|
||||
nkf.o: $(hdrdir)/ruby/backward/2/stdarg.h
|
||||
nkf.o: $(hdrdir)/ruby/defines.h
|
||||
nkf.o: $(hdrdir)/ruby/encoding.h
|
||||
nkf.o: $(hdrdir)/ruby/intern.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/abi.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/anyargs.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/char.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/double.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/int.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/long.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/short.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/assume.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/alloc_size.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/artificial.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/cold.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/const.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/constexpr.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/deprecated.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/error.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/flag_enum.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/forceinline.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/format.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/noalias.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/nodiscard.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/noexcept.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/noinline.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/nonnull.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/noreturn.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/packed_struct.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/pure.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/restrict.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/warning.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/attr/weakref.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/cast.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/compiler_is.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/compiler_is/apple.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/compiler_is/clang.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/compiler_is/intel.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/compiler_since.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/config.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/constant_p.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rarray.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rbasic.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rbignum.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rclass.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rdata.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rfile.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rhash.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/robject.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rregexp.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rstring.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rstruct.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/core/rtypeddata.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/ctype.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/dllexport.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/dosish.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/encoding/coderange.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/encoding/ctype.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/encoding/encoding.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/encoding/pathname.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/encoding/re.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/encoding/sprintf.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/encoding/string.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/encoding/symbol.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/encoding/transcode.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/error.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/eval.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/event.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/fl_type.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/gc.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/glob.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/globals.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/has/attribute.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/has/builtin.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/has/c_attribute.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/has/extension.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/has/feature.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/has/warning.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/array.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/bignum.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/class.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/compar.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/complex.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/cont.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/dir.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/enum.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/enumerator.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/error.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/eval.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/file.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/hash.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/io.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/load.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/marshal.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/numeric.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/object.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/parse.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/proc.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/process.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/random.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/range.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/rational.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/re.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/ruby.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/select.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/select/largesize.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/signal.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/sprintf.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/string.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/struct.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/thread.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/time.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/variable.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/intern/vm.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/interpreter.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/iterator.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/memory.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/method.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/module.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/newobj.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/scan_args.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/special_consts.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/static_assert.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/stdalign.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/stdbool.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/symbol.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/value.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/value_type.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/variable.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/warning_push.h
|
||||
nkf.o: $(hdrdir)/ruby/internal/xmalloc.h
|
||||
nkf.o: $(hdrdir)/ruby/missing.h
|
||||
nkf.o: $(hdrdir)/ruby/onigmo.h
|
||||
nkf.o: $(hdrdir)/ruby/oniguruma.h
|
||||
nkf.o: $(hdrdir)/ruby/ruby.h
|
||||
nkf.o: $(hdrdir)/ruby/st.h
|
||||
nkf.o: $(hdrdir)/ruby/subst.h
|
||||
nkf.o: nkf-utf8/config.h
|
||||
nkf.o: nkf-utf8/nkf.c
|
||||
nkf.o: nkf-utf8/nkf.h
|
||||
nkf.o: nkf-utf8/utf8tbl.c
|
||||
nkf.o: nkf-utf8/utf8tbl.h
|
||||
nkf.o: nkf.c
|
||||
# AUTOGENERATED DEPENDENCIES END
|
|
@ -1,3 +0,0 @@
|
|||
# frozen_string_literal: false
|
||||
require 'mkmf'
|
||||
create_makefile('nkf')
|
|
@ -1,283 +0,0 @@
|
|||
# frozen_string_literal: false
|
||||
#
|
||||
# kconv.rb - Kanji Converter.
|
||||
#
|
||||
# $Id$
|
||||
#
|
||||
# ----
|
||||
#
|
||||
# kconv.rb implements the Kconv class for Kanji Converter. Additionally,
|
||||
# some methods in String classes are added to allow easy conversion.
|
||||
#
|
||||
|
||||
require 'nkf'
|
||||
|
||||
#
|
||||
# Kanji Converter for Ruby.
|
||||
#
|
||||
module Kconv
|
||||
#
|
||||
# Public Constants
|
||||
#
|
||||
|
||||
#Constant of Encoding
|
||||
|
||||
# Auto-Detect
|
||||
AUTO = NKF::AUTO
|
||||
# ISO-2022-JP
|
||||
JIS = NKF::JIS
|
||||
# EUC-JP
|
||||
EUC = NKF::EUC
|
||||
# Shift_JIS
|
||||
SJIS = NKF::SJIS
|
||||
# BINARY
|
||||
BINARY = NKF::BINARY
|
||||
# NOCONV
|
||||
NOCONV = NKF::NOCONV
|
||||
# ASCII
|
||||
ASCII = NKF::ASCII
|
||||
# UTF-8
|
||||
UTF8 = NKF::UTF8
|
||||
# UTF-16
|
||||
UTF16 = NKF::UTF16
|
||||
# UTF-32
|
||||
UTF32 = NKF::UTF32
|
||||
# UNKNOWN
|
||||
UNKNOWN = NKF::UNKNOWN
|
||||
|
||||
#
|
||||
# Public Methods
|
||||
#
|
||||
|
||||
# call-seq:
|
||||
# Kconv.kconv(str, to_enc, from_enc=nil)
|
||||
#
|
||||
# Convert <code>str</code> to <code>to_enc</code>.
|
||||
# <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
|
||||
def kconv(str, to_enc, from_enc=nil)
|
||||
opt = ''
|
||||
opt += ' --ic=' + from_enc.to_s if from_enc
|
||||
opt += ' --oc=' + to_enc.to_s if to_enc
|
||||
|
||||
::NKF::nkf(opt, str)
|
||||
end
|
||||
module_function :kconv
|
||||
|
||||
#
|
||||
# Encode to
|
||||
#
|
||||
|
||||
# call-seq:
|
||||
# Kconv.tojis(str) => string
|
||||
#
|
||||
# Convert <code>str</code> to ISO-2022-JP
|
||||
def tojis(str)
|
||||
kconv(str, JIS)
|
||||
end
|
||||
module_function :tojis
|
||||
|
||||
# call-seq:
|
||||
# Kconv.toeuc(str) => string
|
||||
#
|
||||
# Convert <code>str</code> to EUC-JP
|
||||
def toeuc(str)
|
||||
kconv(str, EUC)
|
||||
end
|
||||
module_function :toeuc
|
||||
|
||||
# call-seq:
|
||||
# Kconv.tosjis(str) => string
|
||||
#
|
||||
# Convert <code>str</code> to Shift_JIS
|
||||
def tosjis(str)
|
||||
kconv(str, SJIS)
|
||||
end
|
||||
module_function :tosjis
|
||||
|
||||
# call-seq:
|
||||
# Kconv.toutf8(str) => string
|
||||
#
|
||||
# Convert <code>str</code> to UTF-8
|
||||
def toutf8(str)
|
||||
kconv(str, UTF8)
|
||||
end
|
||||
module_function :toutf8
|
||||
|
||||
# call-seq:
|
||||
# Kconv.toutf16(str) => string
|
||||
#
|
||||
# Convert <code>str</code> to UTF-16
|
||||
def toutf16(str)
|
||||
kconv(str, UTF16)
|
||||
end
|
||||
module_function :toutf16
|
||||
|
||||
# call-seq:
|
||||
# Kconv.toutf32(str) => string
|
||||
#
|
||||
# Convert <code>str</code> to UTF-32
|
||||
def toutf32(str)
|
||||
kconv(str, UTF32)
|
||||
end
|
||||
module_function :toutf32
|
||||
|
||||
# call-seq:
|
||||
# Kconv.tolocale => string
|
||||
#
|
||||
# Convert <code>self</code> to locale encoding
|
||||
def tolocale(str)
|
||||
kconv(str, Encoding.locale_charmap)
|
||||
end
|
||||
module_function :tolocale
|
||||
|
||||
#
|
||||
# guess
|
||||
#
|
||||
|
||||
# call-seq:
|
||||
# Kconv.guess(str) => encoding
|
||||
#
|
||||
# Guess input encoding by NKF.guess
|
||||
def guess(str)
|
||||
::NKF::guess(str)
|
||||
end
|
||||
module_function :guess
|
||||
|
||||
#
|
||||
# isEncoding
|
||||
#
|
||||
|
||||
# call-seq:
|
||||
# Kconv.iseuc(str) => true or false
|
||||
#
|
||||
# Returns whether input encoding is EUC-JP or not.
|
||||
#
|
||||
# *Note* don't expect this return value is MatchData.
|
||||
def iseuc(str)
|
||||
str.dup.force_encoding(EUC).valid_encoding?
|
||||
end
|
||||
module_function :iseuc
|
||||
|
||||
# call-seq:
|
||||
# Kconv.issjis(str) => true or false
|
||||
#
|
||||
# Returns whether input encoding is Shift_JIS or not.
|
||||
def issjis(str)
|
||||
str.dup.force_encoding(SJIS).valid_encoding?
|
||||
end
|
||||
module_function :issjis
|
||||
|
||||
# call-seq:
|
||||
# Kconv.isjis(str) => true or false
|
||||
#
|
||||
# Returns whether input encoding is ISO-2022-JP or not.
|
||||
def isjis(str)
|
||||
/\A [\t\n\r\x20-\x7E]*
|
||||
(?:
|
||||
(?:\x1b \x28 I [\x21-\x7E]*
|
||||
|\x1b \x28 J [\x21-\x7E]*
|
||||
|\x1b \x24 @ (?:[\x21-\x7E]{2})*
|
||||
|\x1b \x24 B (?:[\x21-\x7E]{2})*
|
||||
|\x1b \x24 \x28 D (?:[\x21-\x7E]{2})*
|
||||
)*
|
||||
\x1b \x28 B [\t\n\r\x20-\x7E]*
|
||||
)*
|
||||
\z/nox =~ str.dup.force_encoding('BINARY') ? true : false
|
||||
end
|
||||
module_function :isjis
|
||||
|
||||
# call-seq:
|
||||
# Kconv.isutf8(str) => true or false
|
||||
#
|
||||
# Returns whether input encoding is UTF-8 or not.
|
||||
def isutf8(str)
|
||||
str.dup.force_encoding(UTF8).valid_encoding?
|
||||
end
|
||||
module_function :isutf8
|
||||
end
|
||||
|
||||
class String
|
||||
# call-seq:
|
||||
# String#kconv(to_enc, from_enc)
|
||||
#
|
||||
# Convert <code>self</code> to <code>to_enc</code>.
|
||||
# <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
|
||||
def kconv(to_enc, from_enc=nil)
|
||||
from_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0]
|
||||
Kconv::kconv(self, to_enc, from_enc)
|
||||
end
|
||||
|
||||
#
|
||||
# to Encoding
|
||||
#
|
||||
|
||||
# call-seq:
|
||||
# String#tojis => string
|
||||
#
|
||||
# Convert <code>self</code> to ISO-2022-JP
|
||||
def tojis; Kconv.tojis(self) end
|
||||
|
||||
# call-seq:
|
||||
# String#toeuc => string
|
||||
#
|
||||
# Convert <code>self</code> to EUC-JP
|
||||
def toeuc; Kconv.toeuc(self) end
|
||||
|
||||
# call-seq:
|
||||
# String#tosjis => string
|
||||
#
|
||||
# Convert <code>self</code> to Shift_JIS
|
||||
def tosjis; Kconv.tosjis(self) end
|
||||
|
||||
# call-seq:
|
||||
# String#toutf8 => string
|
||||
#
|
||||
# Convert <code>self</code> to UTF-8
|
||||
def toutf8; Kconv.toutf8(self) end
|
||||
|
||||
# call-seq:
|
||||
# String#toutf16 => string
|
||||
#
|
||||
# Convert <code>self</code> to UTF-16
|
||||
def toutf16; Kconv.toutf16(self) end
|
||||
|
||||
# call-seq:
|
||||
# String#toutf32 => string
|
||||
#
|
||||
# Convert <code>self</code> to UTF-32
|
||||
def toutf32; Kconv.toutf32(self) end
|
||||
|
||||
# call-seq:
|
||||
# String#tolocale => string
|
||||
#
|
||||
# Convert <code>self</code> to locale encoding
|
||||
def tolocale; Kconv.tolocale(self) end
|
||||
|
||||
#
|
||||
# is Encoding
|
||||
#
|
||||
|
||||
# call-seq:
|
||||
# String#iseuc => true or false
|
||||
#
|
||||
# Returns whether <code>self</code>'s encoding is EUC-JP or not.
|
||||
def iseuc; Kconv.iseuc(self) end
|
||||
|
||||
# call-seq:
|
||||
# String#issjis => true or false
|
||||
#
|
||||
# Returns whether <code>self</code>'s encoding is Shift_JIS or not.
|
||||
def issjis; Kconv.issjis(self) end
|
||||
|
||||
# call-seq:
|
||||
# String#isjis => true or false
|
||||
#
|
||||
# Returns whether <code>self</code>'s encoding is ISO-2022-JP or not.
|
||||
def isjis; Kconv.isjis(self) end
|
||||
|
||||
# call-seq:
|
||||
# String#isutf8 => true or false
|
||||
#
|
||||
# Returns whether <code>self</code>'s encoding is UTF-8 or not.
|
||||
def isutf8; Kconv.isutf8(self) end
|
||||
end
|
|
@ -1,51 +0,0 @@
|
|||
#ifndef _CONFIG_H_
|
||||
#define _CONFIG_H_
|
||||
|
||||
/* UTF8 input and output */
|
||||
#define UTF8_INPUT_ENABLE
|
||||
#define UTF8_OUTPUT_ENABLE
|
||||
|
||||
/* invert characters invalid in Shift_JIS to CP932 */
|
||||
#define SHIFTJIS_CP932
|
||||
|
||||
/* fix input encoding when given by option */
|
||||
#define INPUT_CODE_FIX
|
||||
|
||||
/* --overwrite option */
|
||||
/* by Satoru Takabayashi <ccsatoru@vega.aichi-u.ac.jp> */
|
||||
#define OVERWRITE
|
||||
|
||||
/* --cap-input, --url-input option */
|
||||
#define INPUT_OPTION
|
||||
|
||||
/* --numchar-input option */
|
||||
#define NUMCHAR_OPTION
|
||||
|
||||
/* --debug, --no-output option */
|
||||
#define CHECK_OPTION
|
||||
|
||||
/* JIS X0212 */
|
||||
#define X0212_ENABLE
|
||||
|
||||
/* --exec-in, --exec-out option
|
||||
* require pipe, fork, execvp and so on.
|
||||
* please undef this on MS-DOS, MinGW
|
||||
* this is still buggy around child process
|
||||
*/
|
||||
/* #define EXEC_IO */
|
||||
|
||||
/* Unicode Normalization */
|
||||
#define UNICODE_NORMALIZATION
|
||||
|
||||
/*
|
||||
* Select Default Output Encoding
|
||||
*
|
||||
*/
|
||||
|
||||
/* #define DEFAULT_CODE_JIS */
|
||||
/* #define DEFAULT_CODE_SJIS */
|
||||
/* #define DEFAULT_CODE_WINDOWS_31J */
|
||||
/* #define DEFAULT_CODE_EUC */
|
||||
/* #define DEFAULT_CODE_UTF8 */
|
||||
|
||||
#endif /* _CONFIG_H_ */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,189 +0,0 @@
|
|||
/*
|
||||
*
|
||||
* nkf.h - Header file for nkf
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NKF_H
|
||||
#define NKF_H
|
||||
|
||||
/* Wrapper of configurations */
|
||||
|
||||
#ifndef MIME_DECODE_DEFAULT
|
||||
#define MIME_DECODE_DEFAULT STRICT_MIME
|
||||
#endif
|
||||
#ifndef X0201_DEFAULT
|
||||
#define X0201_DEFAULT TRUE
|
||||
#endif
|
||||
|
||||
#if defined(DEFAULT_NEWLINE) && DEFAULT_NEWLINE == 0x0D0A
|
||||
#elif defined(DEFAULT_NEWLINE) && DEFAULT_NEWLINE == 0x0D
|
||||
#else
|
||||
#define DEFAULT_NEWLINE 0x0A
|
||||
#endif
|
||||
#ifdef HELP_OUTPUT_STDERR
|
||||
#define HELP_OUTPUT stderr
|
||||
#else
|
||||
#define HELP_OUTPUT stdout
|
||||
#endif
|
||||
|
||||
|
||||
/* Compatibility definitions */
|
||||
|
||||
#ifdef nkf_char
|
||||
#elif defined(INT_IS_SHORT)
|
||||
typedef long nkf_char;
|
||||
#define NKF_INT32_C(n) (n##L)
|
||||
#else
|
||||
typedef int nkf_char;
|
||||
#define NKF_INT32_C(n) (n)
|
||||
#endif
|
||||
|
||||
#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || (defined(__WATCOMC__) && defined(__386__) && !defined(__LINUX__)) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
|
||||
#define MSDOS
|
||||
#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
|
||||
#define __WIN32__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERL_XS
|
||||
#undef OVERWRITE
|
||||
#endif
|
||||
|
||||
#ifndef PERL_XS
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(MSDOS) || defined(__OS2__)
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#if defined(_MSC_VER) || defined(__WATCOMC__)
|
||||
#define mktemp _mktemp
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MSDOS
|
||||
#ifdef LSI_C
|
||||
#define setbinmode(fp) fsetbin(fp)
|
||||
#elif defined(__DJGPP__)
|
||||
#include <libc/dosio.h>
|
||||
void setbinmode(FILE *fp)
|
||||
{
|
||||
/* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
|
||||
int fd, m;
|
||||
fd = fileno(fp);
|
||||
m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
|
||||
__file_handle_set(fd, m);
|
||||
}
|
||||
#else /* Microsoft C, Turbo C */
|
||||
#define setbinmode(fp) setmode(fileno(fp), O_BINARY)
|
||||
#endif
|
||||
#else /* UNIX */
|
||||
#define setbinmode(fp) (void)(fp)
|
||||
#endif
|
||||
|
||||
#ifdef _IOFBF /* SysV and MSDOS, Windows */
|
||||
#define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
|
||||
#else /* BSD */
|
||||
#define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
|
||||
#endif
|
||||
|
||||
/*Borland C++ 4.5 EasyWin*/
|
||||
#if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
|
||||
#define EASYWIN
|
||||
#ifndef __WIN16__
|
||||
#define __WIN16__
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef OVERWRITE
|
||||
/* added by satoru@isoternet.org */
|
||||
#if defined(__EMX__)
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include <sys/stat.h>
|
||||
#if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
|
||||
#include <unistd.h>
|
||||
#if defined(__WATCOMC__)
|
||||
#include <sys/utime.h>
|
||||
#else
|
||||
#include <utime.h>
|
||||
#endif
|
||||
#else /* defined(MSDOS) */
|
||||
#ifdef __WIN32__
|
||||
#ifdef __BORLANDC__ /* BCC32 */
|
||||
#include <utime.h>
|
||||
#else /* !defined(__BORLANDC__) */
|
||||
#include <sys/utime.h>
|
||||
#endif /* (__BORLANDC__) */
|
||||
#else /* !defined(__WIN32__) */
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__) /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
|
||||
#include <sys/utime.h>
|
||||
#elif defined(__TURBOC__) /* BCC */
|
||||
#include <utime.h>
|
||||
#elif defined(LSI_C) /* LSI C */
|
||||
#endif /* (__WIN32__) */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(DEFAULT_CODE_JIS) && !defined(DEFAULT_CODE_SJIS) && \
|
||||
!defined(DEFAULT_CODE_WINDOWS_31J) && !defined(DEFAULT_CODE_EUC) && \
|
||||
!defined(DEFAULT_CODE_UTF8) && !defined(DEFAULT_CODE_LOCALE)
|
||||
#define DEFAULT_CODE_LOCALE
|
||||
#endif
|
||||
|
||||
#ifdef DEFAULT_CODE_LOCALE
|
||||
|
||||
#if defined(__WIN32__) /* not win32 should be posix */
|
||||
# ifndef HAVE_LOCALE_H
|
||||
# define HAVE_LOCALE_H
|
||||
# endif
|
||||
#elif defined(__OS2__)
|
||||
# undef HAVE_LANGINFO_H /* We do not use kLIBC's langinfo. */
|
||||
# ifndef HAVE_LOCALE_H
|
||||
# define HAVE_LOCALE_H
|
||||
# endif
|
||||
#elif defined(MSDOS)
|
||||
# ifndef HAVE_LOCALE_H
|
||||
# define HAVE_LOCALE_H
|
||||
# endif
|
||||
#elif defined(__BIONIC__) /* bionic doesn't have locale */
|
||||
#else
|
||||
# ifndef HAVE_LANGINFO_H
|
||||
# define HAVE_LANGINFO_H
|
||||
# endif
|
||||
# ifndef HAVE_LOCALE_H
|
||||
# define HAVE_LOCALE_H
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LANGINFO_H
|
||||
#include <langinfo.h>
|
||||
#endif
|
||||
#ifdef HAVE_LOCALE_H
|
||||
#include <locale.h>
|
||||
#endif
|
||||
|
||||
#endif /* DEFAULT_CODE_LOCALE */
|
||||
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
|
||||
#ifndef ARG_UNUSED
|
||||
#if defined(__GNUC__)
|
||||
# define ARG_UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
# define ARG_UNUSED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef WIN32DLL
|
||||
#include "nkf32.h"
|
||||
#endif
|
||||
|
||||
#endif /* NKF_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,72 +0,0 @@
|
|||
/*
|
||||
* utf8tbl.h - Header file for Conversion Table
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _UTF8TBL_H_
|
||||
#define _UTF8TBL_H_
|
||||
|
||||
#ifdef UTF8_OUTPUT_ENABLE
|
||||
#define sizeof_euc_to_utf8_1byte 94
|
||||
#define sizeof_euc_to_utf8_2bytes 94
|
||||
extern const unsigned short euc_to_utf8_1byte[];
|
||||
extern const unsigned short *const euc_to_utf8_2bytes[];
|
||||
extern const unsigned short *const euc_to_utf8_2bytes_ms[];
|
||||
extern const unsigned short *const euc_to_utf8_2bytes_mac[];
|
||||
extern const unsigned short *const euc_to_utf8_2bytes_x0213[];
|
||||
extern const unsigned short *const x0212_to_utf8_2bytes[];
|
||||
extern const unsigned short *const x0212_to_utf8_2bytes_x0213[];
|
||||
#define sizeof_x0213_combining_chars 5
|
||||
#define sizeof_x0213_combining_table 25
|
||||
#define sizeof_x0213_1_surrogate_table 26
|
||||
#define sizeof_x0213_2_surrogate_table 277
|
||||
extern const unsigned short x0213_combining_chars[sizeof_x0213_combining_chars];
|
||||
extern const unsigned short x0213_combining_table[sizeof_x0213_combining_table][3];
|
||||
extern const unsigned short x0213_1_surrogate_table[sizeof_x0213_1_surrogate_table][3];
|
||||
extern const unsigned short x0213_2_surrogate_table[sizeof_x0213_2_surrogate_table][3];
|
||||
#endif /* UTF8_OUTPUT_ENABLE */
|
||||
|
||||
#ifdef UTF8_INPUT_ENABLE
|
||||
#define sizeof_utf8_to_euc_C2 64
|
||||
#define sizeof_utf8_to_euc_E5B8 64
|
||||
#define sizeof_utf8_to_euc_2bytes 112
|
||||
#define sizeof_utf8_to_euc_3bytes 16
|
||||
extern const unsigned short *const utf8_to_euc_2bytes[];
|
||||
extern const unsigned short *const utf8_to_euc_2bytes_ms[];
|
||||
extern const unsigned short *const utf8_to_euc_2bytes_932[];
|
||||
extern const unsigned short *const utf8_to_euc_2bytes_mac[];
|
||||
extern const unsigned short *const utf8_to_euc_2bytes_x0213[];
|
||||
extern const unsigned short *const *const utf8_to_euc_3bytes[];
|
||||
extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
|
||||
extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
|
||||
extern const unsigned short *const *const utf8_to_euc_3bytes_mac[];
|
||||
extern const unsigned short *const *const utf8_to_euc_3bytes_x0213[];
|
||||
#endif /* UTF8_INPUT_ENABLE */
|
||||
|
||||
#ifdef UNICODE_NORMALIZATION
|
||||
|
||||
#define NORMALIZATION_TABLE_LENGTH 942
|
||||
#define NORMALIZATION_TABLE_NFC_LENGTH 3
|
||||
#define NORMALIZATION_TABLE_NFD_LENGTH 9
|
||||
struct normalization_pair {
|
||||
const unsigned char nfc[NORMALIZATION_TABLE_NFC_LENGTH];
|
||||
const unsigned char nfd[NORMALIZATION_TABLE_NFD_LENGTH];
|
||||
};
|
||||
extern const struct normalization_pair normalization_table[];
|
||||
#endif
|
||||
|
||||
#ifdef SHIFTJIS_CP932
|
||||
#define CP932_TABLE_BEGIN 0xFA
|
||||
#define CP932_TABLE_END 0xFC
|
||||
extern const unsigned short shiftjis_cp932[3][189];
|
||||
#define CP932INV_TABLE_BEGIN 0xED
|
||||
#define CP932INV_TABLE_END 0xEE
|
||||
extern const unsigned short cp932inv[2][189];
|
||||
#endif /* SHIFTJIS_CP932 */
|
||||
|
||||
#ifdef X0212_ENABLE
|
||||
extern const unsigned short shiftjis_x0212[3][189];
|
||||
extern const unsigned short *const x0212_shiftjis[];
|
||||
#endif /* X0212_ENABLE */
|
||||
|
||||
#endif
|
506
ext/nkf/nkf.c
506
ext/nkf/nkf.c
|
@ -1,506 +0,0 @@
|
|||
/*
|
||||
* NKF - Ruby extension for Network Kanji Filter
|
||||
*
|
||||
* original nkf2.x is maintained at http://sourceforge.jp/projects/nkf/
|
||||
*
|
||||
* $Id$
|
||||
*
|
||||
*/
|
||||
|
||||
#define RUBY_NKF_REVISION "$Revision$"
|
||||
#define RUBY_NKF_VERSION NKF_VERSION " (" NKF_RELEASE_DATE ")"
|
||||
#define NKF_GEM_VERSION "0.2.0"
|
||||
|
||||
#include "ruby/ruby.h"
|
||||
#include "ruby/encoding.h"
|
||||
|
||||
/* Replace nkf's getchar/putchar for variable modification */
|
||||
/* we never use getc, ungetc */
|
||||
|
||||
#undef getc
|
||||
#undef ungetc
|
||||
#define getc(f) (input_ctr>=i_len?-1:input[input_ctr++])
|
||||
#define ungetc(c,f) input_ctr--
|
||||
|
||||
#define INCSIZE 32
|
||||
#undef putchar
|
||||
#undef TRUE
|
||||
#undef FALSE
|
||||
#define putchar(c) rb_nkf_putchar(c)
|
||||
|
||||
/* Input/Output pointers */
|
||||
|
||||
static unsigned char *output;
|
||||
static unsigned char *input;
|
||||
static int input_ctr;
|
||||
static int i_len;
|
||||
static int output_ctr;
|
||||
static int o_len;
|
||||
static int incsize;
|
||||
|
||||
static VALUE result;
|
||||
|
||||
static int
|
||||
rb_nkf_putchar(unsigned int c)
|
||||
{
|
||||
if (output_ctr >= o_len) {
|
||||
o_len += incsize;
|
||||
rb_str_resize(result, o_len);
|
||||
incsize *= 2;
|
||||
output = (unsigned char *)RSTRING_PTR(result);
|
||||
}
|
||||
output[output_ctr++] = c;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/* Include kanji filter main part */
|
||||
/* getchar and putchar will be replaced during inclusion */
|
||||
|
||||
#define PERL_XS 1
|
||||
#include "nkf-utf8/config.h"
|
||||
#include "nkf-utf8/utf8tbl.c"
|
||||
#include "nkf-utf8/nkf.c"
|
||||
|
||||
rb_encoding* rb_nkf_enc_get(const char *name)
|
||||
{
|
||||
int idx = rb_enc_find_index(name);
|
||||
if (idx < 0) {
|
||||
nkf_encoding *nkf_enc = nkf_enc_find(name);
|
||||
idx = rb_enc_find_index(nkf_enc_name(nkf_enc_to_base_encoding(nkf_enc)));
|
||||
if (idx < 0) {
|
||||
idx = rb_define_dummy_encoding(name);
|
||||
}
|
||||
}
|
||||
return rb_enc_from_index(idx);
|
||||
}
|
||||
|
||||
int nkf_split_options(const char *arg)
|
||||
{
|
||||
int count = 0;
|
||||
unsigned char option[256];
|
||||
int i = 0, j = 0;
|
||||
int is_escaped = FALSE;
|
||||
int is_single_quoted = FALSE;
|
||||
int is_double_quoted = FALSE;
|
||||
for(i = 0; arg[i]; i++){
|
||||
if(j == 255){
|
||||
return -1;
|
||||
}else if(is_single_quoted){
|
||||
if(arg[i] == '\''){
|
||||
is_single_quoted = FALSE;
|
||||
}else{
|
||||
option[j++] = arg[i];
|
||||
}
|
||||
}else if(is_escaped){
|
||||
is_escaped = FALSE;
|
||||
option[j++] = arg[i];
|
||||
}else if(arg[i] == '\\'){
|
||||
is_escaped = TRUE;
|
||||
}else if(is_double_quoted){
|
||||
if(arg[i] == '"'){
|
||||
is_double_quoted = FALSE;
|
||||
}else{
|
||||
option[j++] = arg[i];
|
||||
}
|
||||
}else if(arg[i] == '\''){
|
||||
is_single_quoted = TRUE;
|
||||
}else if(arg[i] == '"'){
|
||||
is_double_quoted = TRUE;
|
||||
}else if(arg[i] == ' '){
|
||||
option[j] = '\0';
|
||||
options(option);
|
||||
j = 0;
|
||||
}else{
|
||||
option[j++] = arg[i];
|
||||
}
|
||||
}
|
||||
if(j){
|
||||
option[j] = '\0';
|
||||
options(option);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* NKF.nkf(opt, str) => string
|
||||
*
|
||||
* Convert _str_ and return converted result.
|
||||
* Conversion details are specified by _opt_ as String.
|
||||
*
|
||||
* require 'nkf'
|
||||
* output = NKF.nkf("-s", input)
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
|
||||
{
|
||||
VALUE tmp;
|
||||
reinit();
|
||||
nkf_split_options(StringValueCStr(opt));
|
||||
if (!output_encoding) rb_raise(rb_eArgError, "no output encoding given");
|
||||
|
||||
switch (nkf_enc_to_index(output_encoding)) {
|
||||
case UTF_8_BOM: output_encoding = nkf_enc_from_index(UTF_8); break;
|
||||
case UTF_16BE_BOM: output_encoding = nkf_enc_from_index(UTF_16BE); break;
|
||||
case UTF_16LE_BOM: output_encoding = nkf_enc_from_index(UTF_16LE); break;
|
||||
case UTF_32BE_BOM: output_encoding = nkf_enc_from_index(UTF_32BE); break;
|
||||
case UTF_32LE_BOM: output_encoding = nkf_enc_from_index(UTF_32LE); break;
|
||||
}
|
||||
output_bom_f = FALSE;
|
||||
|
||||
incsize = INCSIZE;
|
||||
|
||||
input_ctr = 0;
|
||||
input = (unsigned char *)StringValuePtr(src);
|
||||
i_len = RSTRING_LENINT(src);
|
||||
tmp = rb_str_new(0, i_len*3 + 10);
|
||||
|
||||
output_ctr = 0;
|
||||
output = (unsigned char *)RSTRING_PTR(tmp);
|
||||
o_len = RSTRING_LENINT(tmp);
|
||||
*output = '\0';
|
||||
|
||||
/* use _result_ begin*/
|
||||
result = tmp;
|
||||
kanji_convert(NULL);
|
||||
result = Qnil;
|
||||
/* use _result_ end */
|
||||
|
||||
rb_str_set_len(tmp, output_ctr);
|
||||
|
||||
if (mimeout_f)
|
||||
rb_enc_associate(tmp, rb_usascii_encoding());
|
||||
else
|
||||
rb_enc_associate(tmp, rb_nkf_enc_get(nkf_enc_name(output_encoding)));
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* NKF.guess(str) => encoding
|
||||
*
|
||||
* Returns guessed encoding of _str_ by nkf routine.
|
||||
*
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
rb_nkf_guess(VALUE obj, VALUE src)
|
||||
{
|
||||
reinit();
|
||||
|
||||
input_ctr = 0;
|
||||
input = (unsigned char *)StringValuePtr(src);
|
||||
i_len = RSTRING_LENINT(src);
|
||||
|
||||
guess_f = TRUE;
|
||||
kanji_convert( NULL );
|
||||
guess_f = FALSE;
|
||||
|
||||
return rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code()));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* NKF - Ruby extension for Network Kanji Filter
|
||||
*
|
||||
* == Description
|
||||
*
|
||||
* This is a Ruby Extension version of nkf (Network Kanji Filter).
|
||||
* It converts the first argument and returns converted result. Conversion
|
||||
* details are specified by flags as the first argument.
|
||||
*
|
||||
* *Nkf* is a yet another kanji code converter among networks, hosts and terminals.
|
||||
* It converts input kanji code to designated kanji code
|
||||
* such as ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8 or UTF-16.
|
||||
*
|
||||
* One of the most unique faculty of *nkf* is the guess of the input kanji encodings.
|
||||
* It currently recognizes ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8 and UTF-16.
|
||||
* So users needn't set the input kanji code explicitly.
|
||||
*
|
||||
* By default, X0201 kana is converted into X0208 kana.
|
||||
* For X0201 kana, SO/SI, SSO and ESC-(-I methods are supported.
|
||||
* For automatic code detection, nkf assumes no X0201 kana in Shift_JIS.
|
||||
* To accept X0201 in Shift_JIS, use <b>-X</b>, <b>-x</b> or <b>-S</b>.
|
||||
*
|
||||
* == Flags
|
||||
*
|
||||
* === -b -u
|
||||
*
|
||||
* Output is buffered (DEFAULT), Output is unbuffered.
|
||||
*
|
||||
* === -j -s -e -w -w16 -w32
|
||||
*
|
||||
* Output code is ISO-2022-JP (7bit JIS), Shift_JIS, EUC-JP,
|
||||
* UTF-8N, UTF-16BE, UTF-32BE.
|
||||
* Without this option and compile option, ISO-2022-JP is assumed.
|
||||
*
|
||||
* === -J -S -E -W -W16 -W32
|
||||
*
|
||||
* Input assumption is JIS 7 bit, Shift_JIS, EUC-JP,
|
||||
* UTF-8, UTF-16, UTF-32.
|
||||
*
|
||||
* ==== -J
|
||||
*
|
||||
* Assume JIS input. It also accepts EUC-JP.
|
||||
* This is the default. This flag does not exclude Shift_JIS.
|
||||
*
|
||||
* ==== -S
|
||||
*
|
||||
* Assume Shift_JIS and X0201 kana input. It also accepts JIS.
|
||||
* EUC-JP is recognized as X0201 kana. Without <b>-x</b> flag,
|
||||
* X0201 kana (halfwidth kana) is converted into X0208.
|
||||
*
|
||||
* ==== -E
|
||||
*
|
||||
* Assume EUC-JP input. It also accepts JIS.
|
||||
* Same as -J.
|
||||
*
|
||||
* === -t
|
||||
*
|
||||
* No conversion.
|
||||
*
|
||||
* === -i_
|
||||
*
|
||||
* Output sequence to designate JIS-kanji. (DEFAULT B)
|
||||
*
|
||||
* === -o_
|
||||
*
|
||||
* Output sequence to designate ASCII. (DEFAULT B)
|
||||
*
|
||||
* === -r
|
||||
*
|
||||
* {de/en}crypt ROT13/47
|
||||
*
|
||||
* === \-h[123] --hiragana --katakana --katakana-hiragana
|
||||
*
|
||||
* [-h1 --hiragana] Katakana to Hiragana conversion.
|
||||
*
|
||||
* [-h2 --katakana] Hiragana to Katakana conversion.
|
||||
*
|
||||
* [-h3 --katakana-hiragana] Katakana to Hiragana and Hiragana to Katakana conversion.
|
||||
*
|
||||
* === -T
|
||||
*
|
||||
* Text mode output (MS-DOS)
|
||||
*
|
||||
* === -l
|
||||
*
|
||||
* ISO8859-1 (Latin-1) support
|
||||
*
|
||||
* === -f[<code>m</code> [- <code>n</code>]]
|
||||
*
|
||||
* Folding on <code>m</code> length with <code>n</code> margin in a line.
|
||||
* Without this option, fold length is 60 and fold margin is 10.
|
||||
*
|
||||
* === -F
|
||||
*
|
||||
* New line preserving line folding.
|
||||
*
|
||||
* === \-Z[0-3]
|
||||
*
|
||||
* Convert X0208 alphabet (Fullwidth Alphabets) to ASCII.
|
||||
*
|
||||
* [-Z -Z0] Convert X0208 alphabet to ASCII.
|
||||
*
|
||||
* [-Z1] Converts X0208 kankaku to single ASCII space.
|
||||
*
|
||||
* [-Z2] Converts X0208 kankaku to double ASCII spaces.
|
||||
*
|
||||
* [-Z3] Replacing Fullwidth >, <, ", & into '>', '<', '"', '&' as in HTML.
|
||||
*
|
||||
* === -X -x
|
||||
*
|
||||
* Assume X0201 kana in MS-Kanji.
|
||||
* With <b>-X</b> or without this option, X0201 is converted into X0208 Kana.
|
||||
* With <b>-x</b>, try to preserve X0208 kana and do not convert X0201 kana to X0208.
|
||||
* In JIS output, ESC-(-I is used. In EUC output, SSO is used.
|
||||
*
|
||||
* === \-B[0-2]
|
||||
*
|
||||
* Assume broken JIS-Kanji input, which lost ESC.
|
||||
* Useful when your site is using old B-News Nihongo patch.
|
||||
*
|
||||
* [-B1] allows any char after ESC-( or ESC-$.
|
||||
*
|
||||
* [-B2] forces ASCII after NL.
|
||||
*
|
||||
* === -I
|
||||
*
|
||||
* Replacing non iso-2022-jp char into a geta character
|
||||
* (substitute character in Japanese).
|
||||
*
|
||||
* === -d -c
|
||||
*
|
||||
* Delete \r in line feed, Add \r in line feed.
|
||||
*
|
||||
* === \-m[BQN0]
|
||||
*
|
||||
* MIME ISO-2022-JP/ISO8859-1 decode. (DEFAULT)
|
||||
* To see ISO8859-1 (Latin-1) -l is necessary.
|
||||
*
|
||||
* [-mB] Decode MIME base64 encoded stream. Remove header or other part before
|
||||
* conversion.
|
||||
*
|
||||
* [-mQ] Decode MIME quoted stream. '_' in quoted stream is converted to space.
|
||||
*
|
||||
* [-mN] Non-strict decoding.
|
||||
* It allows line break in the middle of the base64 encoding.
|
||||
*
|
||||
* [-m0] No MIME decode.
|
||||
*
|
||||
* === -M
|
||||
*
|
||||
* MIME encode. Header style. All ASCII code and control characters are intact.
|
||||
* Kanji conversion is performed before encoding, so this cannot be used as a picture encoder.
|
||||
*
|
||||
* [-MB] MIME encode Base64 stream.
|
||||
*
|
||||
* [-MQ] Perform quoted encoding.
|
||||
*
|
||||
* === -l
|
||||
*
|
||||
* Input and output code is ISO8859-1 (Latin-1) and ISO-2022-JP.
|
||||
* <b>-s</b>, <b>-e</b> and <b>-x</b> are not compatible with this option.
|
||||
*
|
||||
* === \-L[uwm]
|
||||
*
|
||||
* new line mode
|
||||
* Without this option, nkf doesn't convert line breaks.
|
||||
*
|
||||
* [-Lu] unix (LF)
|
||||
*
|
||||
* [-Lw] windows (CRLF)
|
||||
*
|
||||
* [-Lm] mac (CR)
|
||||
*
|
||||
* === --fj --unix --mac --msdos --windows
|
||||
*
|
||||
* convert for these system
|
||||
*
|
||||
* === --jis --euc --sjis --mime --base64
|
||||
*
|
||||
* convert for named code
|
||||
*
|
||||
* === --jis-input --euc-input --sjis-input --mime-input --base64-input
|
||||
*
|
||||
* assume input system
|
||||
*
|
||||
* === --ic=<code>input codeset</code> --oc=<code>output codeset</code>
|
||||
*
|
||||
* Set the input or output codeset.
|
||||
* NKF supports following codesets and those codeset name are case insensitive.
|
||||
*
|
||||
* [ISO-2022-JP] a.k.a. RFC1468, 7bit JIS, JUNET
|
||||
*
|
||||
* [EUC-JP (eucJP-nkf)] a.k.a. AT&T JIS, Japanese EUC, UJIS
|
||||
*
|
||||
* [eucJP-ascii] a.k.a. x-eucjp-open-19970715-ascii
|
||||
*
|
||||
* [eucJP-ms] a.k.a. x-eucjp-open-19970715-ms
|
||||
*
|
||||
* [CP51932] Microsoft Version of EUC-JP.
|
||||
*
|
||||
* [Shift_JIS] SJIS, MS-Kanji
|
||||
*
|
||||
* [Windows-31J] a.k.a. CP932
|
||||
*
|
||||
* [UTF-8] same as UTF-8N
|
||||
*
|
||||
* [UTF-8N] UTF-8 without BOM
|
||||
*
|
||||
* [UTF-8-BOM] UTF-8 with BOM
|
||||
*
|
||||
* [UTF-16] same as UTF-16BE
|
||||
*
|
||||
* [UTF-16BE] UTF-16 Big Endian without BOM
|
||||
*
|
||||
* [UTF-16BE-BOM] UTF-16 Big Endian with BOM
|
||||
*
|
||||
* [UTF-16LE] UTF-16 Little Endian without BOM
|
||||
*
|
||||
* [UTF-16LE-BOM] UTF-16 Little Endian with BOM
|
||||
*
|
||||
* [UTF-32] same as UTF-32BE
|
||||
*
|
||||
* [UTF-32BE] UTF-32 Big Endian without BOM
|
||||
*
|
||||
* [UTF-32BE-BOM] UTF-32 Big Endian with BOM
|
||||
*
|
||||
* [UTF-32LE] UTF-32 Little Endian without BOM
|
||||
*
|
||||
* [UTF-32LE-BOM] UTF-32 Little Endian with BOM
|
||||
*
|
||||
* [UTF8-MAC] NKDed UTF-8, a.k.a. UTF8-NFD (input only)
|
||||
*
|
||||
* === --fb-{skip, html, xml, perl, java, subchar}
|
||||
*
|
||||
* Specify the way that nkf handles unassigned characters.
|
||||
* Without this option, --fb-skip is assumed.
|
||||
*
|
||||
* === --prefix= <code>escape character</code> <code>target character</code> ..
|
||||
*
|
||||
* When nkf converts to Shift_JIS,
|
||||
* nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters.
|
||||
* 1st byte of argument is the escape character and following bytes are target characters.
|
||||
*
|
||||
* === --no-cp932ext
|
||||
*
|
||||
* Handle the characters extended in CP932 as unassigned characters.
|
||||
*
|
||||
* == --no-best-fit-chars
|
||||
*
|
||||
* When Unicode to Encoded byte conversion,
|
||||
* don't convert characters which is not round trip safe.
|
||||
* When Unicode to Unicode conversion,
|
||||
* with this and -x option, nkf can be used as UTF converter.
|
||||
* (In other words, without this and -x option, nkf doesn't save some characters)
|
||||
*
|
||||
* When nkf convert string which related to path, you should use this option.
|
||||
*
|
||||
* === --cap-input
|
||||
*
|
||||
* Decode hex encoded characters.
|
||||
*
|
||||
* === --url-input
|
||||
*
|
||||
* Unescape percent escaped characters.
|
||||
*
|
||||
* === --
|
||||
*
|
||||
* Ignore rest of -option.
|
||||
*/
|
||||
|
||||
void
|
||||
Init_nkf(void)
|
||||
{
|
||||
VALUE mNKF = rb_define_module("NKF");
|
||||
|
||||
rb_define_module_function(mNKF, "nkf", rb_nkf_convert, 2);
|
||||
rb_define_module_function(mNKF, "guess", rb_nkf_guess, 1);
|
||||
rb_define_alias(rb_singleton_class(mNKF), "guess", "guess");
|
||||
|
||||
rb_define_const(mNKF, "AUTO", Qnil);
|
||||
rb_define_const(mNKF, "NOCONV", Qnil);
|
||||
rb_define_const(mNKF, "UNKNOWN", Qnil);
|
||||
rb_define_const(mNKF, "BINARY", rb_enc_from_encoding(rb_nkf_enc_get("BINARY")));
|
||||
rb_define_const(mNKF, "ASCII", rb_enc_from_encoding(rb_nkf_enc_get("US-ASCII")));
|
||||
rb_define_const(mNKF, "JIS", rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP")));
|
||||
rb_define_const(mNKF, "EUC", rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP")));
|
||||
rb_define_const(mNKF, "SJIS", rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS")));
|
||||
rb_define_const(mNKF, "UTF8", rb_enc_from_encoding(rb_utf8_encoding()));
|
||||
rb_define_const(mNKF, "UTF16", rb_enc_from_encoding(rb_nkf_enc_get("UTF-16BE")));
|
||||
rb_define_const(mNKF, "UTF32", rb_enc_from_encoding(rb_nkf_enc_get("UTF-32BE")));
|
||||
|
||||
/* Full version string of nkf */
|
||||
rb_define_const(mNKF, "VERSION", rb_str_new2(RUBY_NKF_VERSION));
|
||||
/* Version of nkf */
|
||||
rb_define_const(mNKF, "NKF_VERSION", rb_str_new2(NKF_VERSION));
|
||||
/* Release date of nkf */
|
||||
rb_define_const(mNKF, "NKF_RELEASE_DATE", rb_str_new2(NKF_RELEASE_DATE));
|
||||
/* Version of nkf library */
|
||||
rb_define_const(mNKF, "GEM_VERSION", rb_str_new_cstr(NKF_GEM_VERSION));
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
source_version = ["", "ext/nkf/"].find do |dir|
|
||||
begin
|
||||
break File.open(File.join(__dir__, "#{dir}nkf.c")) {|f|
|
||||
f.gets("\n#define NKF_GEM_VERSION ")
|
||||
f.gets[/\s*"(.+)"/, 1]
|
||||
}
|
||||
rescue Errno::ENOENT
|
||||
end
|
||||
end
|
||||
|
||||
Gem::Specification.new do |spec|
|
||||
spec.name = "nkf"
|
||||
spec.version = source_version
|
||||
spec.authors = ["NARUSE Yui", "Charles Oliver Nutter"]
|
||||
spec.email = ["naruse@airemix.jp", "headius@headius.com"]
|
||||
|
||||
spec.summary = %q{Ruby extension for Network Kanji Filter}
|
||||
spec.description = %q{Ruby extension for Network Kanji Filter}
|
||||
spec.homepage = "https://github.com/ruby/nkf"
|
||||
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
||||
spec.licenses = ["Ruby", "BSD-2-Clause"]
|
||||
|
||||
spec.metadata["homepage_uri"] = spec.homepage
|
||||
spec.metadata["source_code_uri"] = spec.homepage
|
||||
|
||||
# Specify which files should be added to the gem when it is released.
|
||||
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
||||
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
||||
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
||||
end
|
||||
|
||||
if Gem::Platform === spec.platform and spec.platform =~ 'java' or RUBY_ENGINE == 'jruby'
|
||||
spec.platform = 'java'
|
||||
spec.licenses += ["EPL-2.0", "LGPL-2.1"]
|
||||
spec.files += Dir["lib/nkf.jar"]
|
||||
else
|
||||
spec.extensions = ["ext/nkf/extconf.rb"]
|
||||
end
|
||||
|
||||
spec.bindir = "exe"
|
||||
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
||||
spec.require_paths = ["lib"]
|
||||
end
|
|
@ -30,3 +30,4 @@ abbrev 0.1.2 https://github.com/ruby/abbrev
|
|||
resolv-replace 0.1.1 https://github.com/ruby/resolv-replace
|
||||
rinda 0.2.0 https://github.com/ruby/rinda
|
||||
drb 2.2.0 https://github.com/ruby/drb
|
||||
nkf 0.2.0 https://github.com/ruby/nkf
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
if RUBY_ENGINE == "jruby"
|
||||
require 'nkf.jar'
|
||||
JRuby::Util.load_ext('org.jruby.ext.nkf.NKFLibrary')
|
||||
else
|
||||
require 'nkf.so'
|
||||
end
|
|
@ -1,82 +0,0 @@
|
|||
# frozen_string_literal: false
|
||||
require 'test/unit'
|
||||
require 'kconv'
|
||||
|
||||
class TestKconv < Test::Unit::TestCase
|
||||
def setup
|
||||
@euc_str = "\
|
||||
\xa5\xaa\xa5\xd6\xa5\xb8\xa5\xa7\xa5\xaf\xa5\xc8\xbb\xd8\xb8\xfe\
|
||||
\xa5\xd7\xa5\xed\xa5\xb0\xa5\xe9\xa5\xdf\xa5\xf3\xa5\xb0\xb8\xc0\xb8\xec\
|
||||
\x52\x75\x62\x79".force_encoding('EUC-JP')
|
||||
@utf8_str = "\
|
||||
\xe3\x82\xaa\xe3\x83\x96\xe3\x82\xb8\xe3\x82\xa7\
|
||||
\xe3\x82\xaf\xe3\x83\x88\xe6\x8c\x87\xe5\x90\x91\
|
||||
\xe3\x83\x97\xe3\x83\xad\xe3\x82\xb0\xe3\x83\xa9\xe3\x83\x9f\
|
||||
\xe3\x83\xb3\xe3\x82\xb0\xe8\xa8\x80\xe8\xaa\x9e\
|
||||
\x52\x75\x62\x79".force_encoding('UTF-8')
|
||||
@sjis_str = "\
|
||||
\x83\x49\x83\x75\x83\x57\x83\x46\x83\x4e\x83\x67\x8e\x77\x8c\xfc\
|
||||
\x83\x76\x83\x8d\x83\x4f\x83\x89\x83\x7e\x83\x93\x83\x4f\x8c\xbe\x8c\xea\
|
||||
\x52\x75\x62\x79".force_encoding('Shift_JIS')
|
||||
@jis_str = "\
|
||||
\x1b\x24\x42\x25\x2a\x25\x56\x25\x38\x25\x27\x25\x2f\x25\x48\x3b\x58\x38\x7e\
|
||||
\x25\x57\x25\x6d\x25\x30\x25\x69\x25\x5f\x25\x73\x25\x30\x38\x40\x38\x6c\x1b\x28\x42\
|
||||
\x52\x75\x62\x79".force_encoding('ISO-2022-JP')
|
||||
end
|
||||
|
||||
|
||||
def test_eucjp
|
||||
assert(@euc_str.iseuc)
|
||||
assert_equal(::Kconv::EUC, Kconv.guess(@euc_str))
|
||||
assert_equal(@euc_str, @euc_str.toeuc)
|
||||
assert_equal(@euc_str, @sjis_str.toeuc)
|
||||
assert_equal(@euc_str, @utf8_str.toeuc)
|
||||
assert_equal(@euc_str, @jis_str.toeuc)
|
||||
assert_equal(@euc_str, @euc_str.kconv(::NKF::EUC))
|
||||
assert_equal(@euc_str, @sjis_str.kconv(::NKF::EUC))
|
||||
assert_equal(@euc_str, @utf8_str.kconv(::NKF::EUC))
|
||||
assert_equal(@euc_str, @jis_str.kconv(::NKF::EUC))
|
||||
end
|
||||
def test_shiftjis
|
||||
assert(@sjis_str.issjis)
|
||||
assert_equal(::Kconv::SJIS, Kconv.guess(@sjis_str))
|
||||
assert_equal(@sjis_str, @euc_str.tosjis)
|
||||
assert_equal(@sjis_str, @sjis_str.tosjis)
|
||||
assert_equal(@sjis_str, @utf8_str.tosjis)
|
||||
assert_equal(@sjis_str, @jis_str.tosjis)
|
||||
assert_equal(@sjis_str, @euc_str.kconv(::NKF::SJIS))
|
||||
assert_equal(@sjis_str, @sjis_str.kconv(::NKF::SJIS))
|
||||
assert_equal(@sjis_str, @utf8_str.kconv(::NKF::SJIS))
|
||||
assert_equal(@sjis_str, @jis_str.kconv(::NKF::SJIS))
|
||||
end
|
||||
def test_utf8
|
||||
assert(@utf8_str.isutf8)
|
||||
assert_equal(::Kconv::UTF8, Kconv.guess(@utf8_str))
|
||||
assert_equal(@utf8_str, @euc_str.toutf8)
|
||||
assert_equal(@utf8_str, @sjis_str.toutf8)
|
||||
assert_equal(@utf8_str, @utf8_str.toutf8)
|
||||
assert_equal(@utf8_str, @jis_str.toutf8)
|
||||
assert_equal(@utf8_str, @euc_str.kconv(::NKF::UTF8))
|
||||
assert_equal(@utf8_str, @sjis_str.kconv(::NKF::UTF8))
|
||||
assert_equal(@utf8_str, @utf8_str.kconv(::NKF::UTF8))
|
||||
assert_equal(@utf8_str, @jis_str.kconv(::NKF::UTF8))
|
||||
end
|
||||
def test_jis
|
||||
assert_equal(::Kconv::JIS, Kconv.guess(@jis_str))
|
||||
assert_equal(@jis_str, @euc_str.tojis)
|
||||
assert_equal(@jis_str, @sjis_str.tojis)
|
||||
assert_equal(@jis_str, @utf8_str.tojis)
|
||||
assert_equal(@jis_str, @jis_str.tojis)
|
||||
assert_equal(@jis_str, @euc_str.kconv(::NKF::JIS))
|
||||
assert_equal(@jis_str, @sjis_str.kconv(::NKF::JIS))
|
||||
assert_equal(@jis_str, @utf8_str.kconv(::NKF::JIS))
|
||||
assert_equal(@jis_str, @jis_str.kconv(::NKF::JIS))
|
||||
end
|
||||
def test_kconv
|
||||
str = "\xc2\xa1"
|
||||
%w/UTF-8 EUC-JP/.each do |enc|
|
||||
s = str.dup.force_encoding(enc)
|
||||
assert_equal(s, s.kconv(enc))
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,23 +0,0 @@
|
|||
# frozen_string_literal: false
|
||||
require 'test/unit'
|
||||
require 'nkf'
|
||||
|
||||
class TestNKF < Test::Unit::TestCase
|
||||
EUC_STR = "\xa5\xaa\xa5\xd6\xa5\xb8\xa5\xa7\xa5\xaf\xa5\xc8\xbb\xd8\xb8\xfe\
|
||||
\xa5\xb9\xa5\xaf\xa5\xea\xa5\xd7\xa5\xc8\xb8\xc0\xb8\xec\
|
||||
Ruby"
|
||||
|
||||
def test_guess
|
||||
str_euc = EUC_STR
|
||||
str_jis = NKF.nkf('-j', str_euc)
|
||||
assert_equal(::NKF::JIS, NKF.guess(str_jis))
|
||||
assert_equal(::NKF::EUC, NKF.guess(str_euc))
|
||||
end
|
||||
|
||||
def test_ruby_dev_36909
|
||||
assert_nothing_raised do
|
||||
100.times { NKF.nkf("--oc=eucJP-nkf", "foo") }
|
||||
end
|
||||
end
|
||||
|
||||
end
|
Загрузка…
Ссылка в новой задаче