1998-01-16 15:13:05 +03:00
|
|
|
# jcode.rb - ruby code to handle japanese (EUC/SJIS) string
|
|
|
|
|
|
|
|
$vsave, $VERBOSE = $VERBOSE, FALSE
|
|
|
|
class String
|
|
|
|
printf STDERR, "feel free for some warnings:\n" if $VERBOSE
|
|
|
|
|
|
|
|
def jlength
|
|
|
|
self.split(//).length
|
|
|
|
end
|
|
|
|
|
|
|
|
alias original_succ succ
|
|
|
|
private :original_succ
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
def mbchar?
|
1999-08-13 09:45:20 +04:00
|
|
|
case $KCODE[0]
|
|
|
|
when ?s, ?S
|
1999-01-20 07:59:39 +03:00
|
|
|
self =~ /[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]/n
|
1999-08-13 09:45:20 +04:00
|
|
|
when ?e, ?E
|
1999-01-20 07:59:39 +03:00
|
|
|
self =~ /[\xa1-\xfe][\xa1-\xfe]/n
|
1998-01-16 15:13:05 +03:00
|
|
|
else
|
1999-01-20 07:59:39 +03:00
|
|
|
false
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def succ
|
1999-08-13 09:45:20 +04:00
|
|
|
if self[-2] and self[-2, 2].mbchar?
|
1998-01-16 15:13:05 +03:00
|
|
|
s = self.dup
|
|
|
|
s[-1] += 1
|
1999-08-13 09:45:20 +04:00
|
|
|
s[-1] += 1 unless s[-2, 2].mbchar?
|
1998-01-16 15:13:05 +03:00
|
|
|
return s
|
|
|
|
else
|
|
|
|
original_succ
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def upto(to)
|
|
|
|
return if self > to
|
|
|
|
|
|
|
|
curr = self
|
|
|
|
tail = self[-2..-1]
|
|
|
|
if tail.length == 2 and tail =~ /^.$/ then
|
|
|
|
if self[0..-2] == to[0..-2]
|
|
|
|
first = self[-2].chr
|
|
|
|
for c in self[-1] .. to[-1]
|
1999-01-20 07:59:39 +03:00
|
|
|
if (first+c.chr).mbchar?
|
1998-01-16 15:13:05 +03:00
|
|
|
yield self[0..-2]+c.chr
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
else
|
|
|
|
loop do
|
|
|
|
yield curr
|
|
|
|
return if curr == to
|
|
|
|
curr = curr.succ
|
|
|
|
return if curr.length > to.length
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return nil
|
|
|
|
end
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
private
|
|
|
|
|
|
|
|
def _expand_ch str
|
1998-01-16 15:13:05 +03:00
|
|
|
a = []
|
1999-08-13 09:45:20 +04:00
|
|
|
str.scan(/(.|\n)-(.|\n)|(.|\n)/) do |r|
|
1998-01-16 15:13:05 +03:00
|
|
|
if $3
|
|
|
|
a.push $3
|
|
|
|
elsif $1.length != $2.length
|
|
|
|
next
|
|
|
|
elsif $1.length == 1
|
|
|
|
$1[0].upto($2[0]) { |c| a.push c.chr }
|
|
|
|
else
|
|
|
|
$1.upto($2) { |c| a.push c }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
a
|
|
|
|
end
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
def expand_ch_hash from, to
|
|
|
|
h = {}
|
|
|
|
afrom = _expand_ch(from)
|
|
|
|
ato = _expand_ch(to)
|
|
|
|
afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end
|
|
|
|
h
|
|
|
|
end
|
|
|
|
|
|
|
|
def bsquote(str)
|
|
|
|
str.gsub(/\\/, '\\\\\\\\')
|
|
|
|
end
|
|
|
|
|
|
|
|
HashCache = {}
|
|
|
|
TrPatternCache = {}
|
|
|
|
DeletePatternCache = {}
|
|
|
|
SqueezePatternCache = {}
|
|
|
|
|
|
|
|
public
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
def tr!(from, to)
|
|
|
|
return self.delete!(from) if to.length == 0
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
pattern = TrPatternCache[from] ||= /[#{bsquote(from)}]/
|
|
|
|
if from[0] == ?^
|
|
|
|
last = /.$/.match(to)[0]
|
|
|
|
self.gsub!(pattern, last)
|
1998-01-16 15:13:05 +03:00
|
|
|
else
|
1999-08-13 09:45:20 +04:00
|
|
|
h = HashCache[from + "::" + to] ||= expand_ch_hash(from, to)
|
|
|
|
self.gsub!(pattern) do |c| h[c] end
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def tr(from, to)
|
1999-01-20 07:59:39 +03:00
|
|
|
(str = self.dup).tr!(from, to) or str
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def delete!(del)
|
1999-08-13 09:45:20 +04:00
|
|
|
self.gsub!(DeletePatternCache[del] ||= /[#{bsquote(del)}]+/, '')
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def delete(del)
|
1999-01-20 07:59:39 +03:00
|
|
|
(str = self.dup).delete!(del) or str
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def squeeze!(del=nil)
|
1999-08-13 09:45:20 +04:00
|
|
|
pattern =
|
|
|
|
if del
|
|
|
|
SqueezePatternCache[del] ||= /([#{bsquote(del)}])\1+/
|
1998-01-16 15:13:05 +03:00
|
|
|
else
|
1999-08-13 09:45:20 +04:00
|
|
|
/(.|\n)\1+/
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
1999-08-13 09:45:20 +04:00
|
|
|
self.gsub!(pattern, '\1')
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def squeeze(del=nil)
|
1999-01-20 07:59:39 +03:00
|
|
|
(str = self.dup).squeeze!(del) or str
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def tr_s!(from, to)
|
|
|
|
return self.delete!(from) if to.length == 0
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
|
|
pattern = SqueezePatternCache[from] ||= /([#{bsquote(from)}])\1+"/
|
|
|
|
if from[0] == ?^
|
|
|
|
last = /.$/.match(to)[0]
|
|
|
|
self.gsub!(pattern, last)
|
|
|
|
else
|
|
|
|
h = HashCache[from + "::" + to] ||= expand_ch_hash(from, to)
|
|
|
|
self.gsub!(pattern) do h[$1] end
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def tr_s(from, to)
|
1999-01-20 07:59:39 +03:00
|
|
|
(str = self.dup).tr_s!(from,to) or str
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def chop!
|
1999-08-13 09:45:20 +04:00
|
|
|
self.gsub!(/(?:.|\r?\n)\z/, '')
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def chop
|
1999-01-20 07:59:39 +03:00
|
|
|
(str = self.dup).chop! or str
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
|
|
def jcount(str)
|
|
|
|
self.delete("^#{str}").jlength
|
|
|
|
end
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
end
|
|
|
|
$VERBOSE = $vsave
|