2008-08-05 16:32:13 +04:00
|
|
|
require 'optparse'
|
|
|
|
require 'erb'
|
2008-08-10 06:45:18 +04:00
|
|
|
require 'fileutils'
|
2008-08-05 16:32:13 +04:00
|
|
|
|
2008-09-01 21:06:07 +04:00
|
|
|
NUM_ELEM_BYTELOOKUP = 2
|
|
|
|
|
2008-08-05 16:32:13 +04:00
|
|
|
C_ESC = {
|
|
|
|
"\\" => "\\\\",
|
|
|
|
'"' => '\"',
|
|
|
|
"\n" => '\n',
|
|
|
|
}
|
|
|
|
|
|
|
|
0x00.upto(0x1f) {|ch| C_ESC[[ch].pack("C")] ||= "\\%03o" % ch }
|
|
|
|
0x7f.upto(0xff) {|ch| C_ESC[[ch].pack("C")] = "\\%03o" % ch }
|
|
|
|
C_ESC_PAT = Regexp.union(*C_ESC.keys)
|
|
|
|
|
|
|
|
def c_esc(str)
|
|
|
|
'"' + str.gsub(C_ESC_PAT) { C_ESC[$&] } + '"'
|
|
|
|
end
|
|
|
|
|
|
|
|
class StrSet
|
|
|
|
def self.parse(pattern)
|
2008-08-06 15:47:14 +04:00
|
|
|
if /\A\s*(([0-9a-f][0-9a-f]|\{([0-9a-f][0-9a-f]|[0-9a-f][0-9a-f]-[0-9a-f][0-9a-f])(,([0-9a-f][0-9a-f]|[0-9a-f][0-9a-f]-[0-9a-f][0-9a-f]))*\})+(\s+|\z))*\z/i !~ pattern
|
|
|
|
raise ArgumentError, "invalid pattern: #{pattern.inspect}"
|
|
|
|
end
|
2008-08-05 16:32:13 +04:00
|
|
|
result = []
|
|
|
|
pattern.scan(/\S+/) {|seq|
|
|
|
|
seq_result = []
|
|
|
|
while !seq.empty?
|
|
|
|
if /\A([0-9a-f][0-9a-f])/i =~ seq
|
|
|
|
byte = $1.to_i(16)
|
|
|
|
seq_result << [byte..byte]
|
|
|
|
seq = $'
|
|
|
|
elsif /\A\{([^\}]+)\}/ =~ seq
|
|
|
|
set = $1
|
|
|
|
seq = $'
|
|
|
|
set_result = []
|
|
|
|
set.scan(/[^,]+/) {|range|
|
|
|
|
if /\A([0-9a-f][0-9a-f])-([0-9a-f][0-9a-f])\z/ =~ range
|
|
|
|
b = $1.to_i(16)
|
|
|
|
e = $2.to_i(16)
|
|
|
|
set_result << (b..e)
|
|
|
|
elsif /\A([0-9a-f][0-9a-f])\z/ =~ range
|
|
|
|
byte = $1.to_i(16)
|
|
|
|
set_result << (byte..byte)
|
|
|
|
else
|
|
|
|
raise "invalid range: #{range.inspect}"
|
|
|
|
end
|
|
|
|
}
|
|
|
|
seq_result << set_result
|
|
|
|
else
|
|
|
|
raise "invalid sequence: #{seq.inspect}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
result << seq_result
|
|
|
|
}
|
|
|
|
self.new(result)
|
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(pat)
|
|
|
|
@pat = pat
|
|
|
|
end
|
|
|
|
|
|
|
|
def hash
|
|
|
|
@pat.hash
|
|
|
|
end
|
|
|
|
|
|
|
|
def eql?(other)
|
|
|
|
self.class == other.class &&
|
|
|
|
@pat == other.instance_eval { @pat }
|
|
|
|
end
|
|
|
|
|
|
|
|
alias == eql?
|
|
|
|
|
|
|
|
def to_s
|
|
|
|
if @pat.empty?
|
|
|
|
"(empset)"
|
|
|
|
else
|
|
|
|
@pat.map {|seq|
|
2008-08-06 15:47:14 +04:00
|
|
|
if seq.empty?
|
|
|
|
"(empstr)"
|
|
|
|
else
|
|
|
|
seq.map {|byteset|
|
|
|
|
if byteset.length == 1 && byteset[0].begin == byteset[0].end
|
|
|
|
"%02x" % byteset[0].begin
|
|
|
|
else
|
|
|
|
"{" +
|
|
|
|
byteset.map {|range|
|
|
|
|
if range.begin == range.end
|
|
|
|
"%02x" % range.begin
|
|
|
|
else
|
|
|
|
"%02x-%02x" % [range.begin, range.end]
|
|
|
|
end
|
|
|
|
}.join(',') +
|
|
|
|
"}"
|
|
|
|
end
|
|
|
|
}.join('')
|
|
|
|
end
|
2008-08-05 16:32:13 +04:00
|
|
|
}.join(' ')
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def inspect
|
|
|
|
"\#<#{self.class}: #{self.to_s}>"
|
|
|
|
end
|
|
|
|
|
2008-08-09 10:02:01 +04:00
|
|
|
def min_length
|
|
|
|
if @pat.empty?
|
|
|
|
nil
|
|
|
|
else
|
|
|
|
@pat.map {|seq| seq.length }.min
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def max_length
|
|
|
|
if @pat.empty?
|
|
|
|
nil
|
|
|
|
else
|
|
|
|
@pat.map {|seq| seq.length }.max
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2008-08-05 16:32:13 +04:00
|
|
|
def emptyable?
|
|
|
|
@pat.any? {|seq|
|
|
|
|
seq.empty?
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def first_bytes
|
|
|
|
result = {}
|
|
|
|
@pat.each {|seq|
|
|
|
|
next if seq.empty?
|
|
|
|
seq.first.each {|range|
|
|
|
|
range.each {|byte|
|
|
|
|
result[byte] = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result.keys.sort
|
|
|
|
end
|
|
|
|
|
|
|
|
def each_firstbyte
|
|
|
|
h = {}
|
|
|
|
@pat.each {|seq|
|
|
|
|
next if seq.empty?
|
|
|
|
seq.first.each {|range|
|
|
|
|
range.each {|byte|
|
|
|
|
(h[byte] ||= []) << seq[1..-1]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
h.keys.sort.each {|byte|
|
|
|
|
yield byte, StrSet.new(h[byte])
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class ActionMap
|
|
|
|
def self.parse(hash)
|
|
|
|
h = {}
|
|
|
|
hash.each {|pat, action|
|
|
|
|
h[StrSet.parse(pat)] = action
|
|
|
|
}
|
|
|
|
self.new(h)
|
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(h)
|
|
|
|
@map = h
|
|
|
|
end
|
|
|
|
|
|
|
|
def hash
|
|
|
|
hash = 0
|
|
|
|
@map.each {|k,v|
|
|
|
|
hash ^= k.hash ^ v.hash
|
|
|
|
}
|
|
|
|
hash
|
|
|
|
end
|
|
|
|
|
|
|
|
def eql?(other)
|
|
|
|
self.class == other.class &&
|
2008-08-06 03:00:01 +04:00
|
|
|
@map == other.instance_eval { @map }
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
|
|
|
|
alias == eql?
|
|
|
|
|
|
|
|
def inspect
|
|
|
|
"\#<#{self.class}:" +
|
|
|
|
@map.map {|k, v| " [" + k.to_s + "]=>" + v.inspect }.join('') +
|
|
|
|
">"
|
|
|
|
end
|
|
|
|
|
2008-08-09 10:02:01 +04:00
|
|
|
def max_input_length
|
|
|
|
@map.keys.map {|k| k.max_length }.max
|
|
|
|
end
|
|
|
|
|
2008-08-05 16:32:13 +04:00
|
|
|
def empty_action
|
|
|
|
@map.each {|ss, action|
|
|
|
|
return action if ss.emptyable?
|
|
|
|
}
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
2008-08-06 15:47:14 +04:00
|
|
|
def each_firstbyte(valid_encoding=nil)
|
2008-08-05 16:32:13 +04:00
|
|
|
h = {}
|
|
|
|
@map.each {|ss, action|
|
|
|
|
if ss.emptyable?
|
|
|
|
raise "emptyable pattern"
|
|
|
|
else
|
|
|
|
ss.each_firstbyte {|byte, rest|
|
|
|
|
h[byte] ||= {}
|
|
|
|
if h[byte][rest]
|
2008-08-31 20:23:04 +04:00
|
|
|
raise "ambiguous %s or %s (%02X/%s)" % [h[byte][rest], action, byte, rest]
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
2008-08-06 15:47:14 +04:00
|
|
|
h[byte][rest] = action
|
2008-08-05 16:32:13 +04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
}
|
2008-08-06 15:47:14 +04:00
|
|
|
if valid_encoding
|
|
|
|
valid_encoding.each_firstbyte {|byte, rest|
|
|
|
|
if h[byte]
|
|
|
|
am = ActionMap.new(h[byte])
|
|
|
|
yield byte, am, rest
|
|
|
|
else
|
|
|
|
am = ActionMap.new(rest => :undef)
|
|
|
|
yield byte, am, nil
|
|
|
|
end
|
|
|
|
}
|
|
|
|
else
|
|
|
|
h.keys.sort.each {|byte|
|
|
|
|
am = ActionMap.new(h[byte])
|
|
|
|
yield byte, am, nil
|
|
|
|
}
|
|
|
|
end
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
|
|
|
|
OffsetsMemo = {}
|
|
|
|
InfosMemo = {}
|
|
|
|
|
2008-08-08 19:48:17 +04:00
|
|
|
def format_offsets(min, max, offsets)
|
|
|
|
offsets = offsets[min..max]
|
2008-09-01 17:21:06 +04:00
|
|
|
code = "%d, %d,\n" % [min, max]
|
2008-08-05 16:32:13 +04:00
|
|
|
0.step(offsets.length-1,16) {|i|
|
|
|
|
code << " "
|
|
|
|
code << offsets[i,8].map {|off| "%3d," % off.to_s }.join('')
|
2008-08-08 19:48:17 +04:00
|
|
|
if i+8 < offsets.length
|
|
|
|
code << " "
|
|
|
|
code << offsets[i+8,8].map {|off| "%3d," % off.to_s }.join('')
|
|
|
|
end
|
2008-08-05 16:32:13 +04:00
|
|
|
code << "\n"
|
|
|
|
}
|
|
|
|
code
|
|
|
|
end
|
|
|
|
|
|
|
|
def generate_info(info)
|
|
|
|
case info
|
|
|
|
when :nomap
|
|
|
|
"NOMAP"
|
|
|
|
when :undef
|
|
|
|
"UNDEF"
|
|
|
|
when :invalid
|
|
|
|
"INVALID"
|
2008-08-07 18:53:30 +04:00
|
|
|
when :func_ii
|
|
|
|
"FUNii"
|
|
|
|
when :func_si
|
|
|
|
"FUNsi"
|
|
|
|
when :func_io
|
|
|
|
"FUNio"
|
2008-08-05 16:32:13 +04:00
|
|
|
when :func_so
|
|
|
|
"FUNso"
|
|
|
|
when /\A([0-9a-f][0-9a-f])\z/i
|
|
|
|
"o1(0x#$1)"
|
|
|
|
when /\A([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i
|
|
|
|
"o2(0x#$1,0x#$2)"
|
|
|
|
when /\A([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i
|
|
|
|
"o3(0x#$1,0x#$2,0x#$3)"
|
|
|
|
when /\A([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i
|
|
|
|
"o4(0x#$1,0x#$2,0x#$3,0x#$4)"
|
2008-09-01 20:22:49 +04:00
|
|
|
when /\A\/\*BYTE_LOOKUP\*\// # pointer to BYTE_LOOKUP structure
|
2008-08-05 16:32:13 +04:00
|
|
|
info.to_s
|
2008-08-14 15:53:47 +04:00
|
|
|
else
|
|
|
|
raise "unexpected action: #{info.inspect}"
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def format_infos(infos)
|
|
|
|
infos = infos.map {|info| generate_info(info) }
|
|
|
|
maxlen = infos.map {|info| info.length }.max
|
|
|
|
columns = maxlen <= 16 ? 4 : 2
|
2008-09-01 21:06:07 +04:00
|
|
|
code = ""
|
2008-08-05 16:32:13 +04:00
|
|
|
0.step(infos.length-1, columns) {|i|
|
|
|
|
code << " "
|
|
|
|
is = infos[i,columns]
|
|
|
|
is.each {|info|
|
|
|
|
code << sprintf(" %#{maxlen}s,", info)
|
|
|
|
}
|
|
|
|
code << "\n"
|
|
|
|
}
|
|
|
|
code
|
|
|
|
end
|
|
|
|
|
2008-09-01 18:16:30 +04:00
|
|
|
def generate_lookup_node(bytes_code, words_code, name, table)
|
2008-08-05 16:32:13 +04:00
|
|
|
offsets = []
|
|
|
|
infos = []
|
|
|
|
infomap = {}
|
2008-08-08 19:48:17 +04:00
|
|
|
min = max = nil
|
2008-08-05 16:32:13 +04:00
|
|
|
table.each_with_index {|action, byte|
|
2008-08-06 15:47:14 +04:00
|
|
|
action ||= :invalid
|
2008-08-08 19:48:17 +04:00
|
|
|
if action != :invalid
|
|
|
|
min = byte if !min
|
|
|
|
max = byte
|
|
|
|
end
|
2008-08-05 16:32:13 +04:00
|
|
|
unless o = infomap[action]
|
|
|
|
infomap[action] = o = infos.length
|
|
|
|
infos[o] = action
|
|
|
|
end
|
|
|
|
offsets[byte] = o
|
|
|
|
}
|
2008-08-08 19:48:17 +04:00
|
|
|
if !min
|
|
|
|
min = max = 0
|
|
|
|
end
|
2008-08-05 16:32:13 +04:00
|
|
|
|
2008-08-12 11:20:10 +04:00
|
|
|
offsets_key = [min, max, offsets[min..max]]
|
|
|
|
if n = OffsetsMemo[offsets_key]
|
2008-08-05 16:32:13 +04:00
|
|
|
offsets_name = n
|
|
|
|
else
|
|
|
|
offsets_name = "#{name}_offsets"
|
2008-09-01 17:21:06 +04:00
|
|
|
OffsetsMemo[offsets_key] = offsets_name
|
|
|
|
if bytes_code.empty?
|
|
|
|
bytes_code << <<"End"
|
2008-08-05 16:32:13 +04:00
|
|
|
static const unsigned char
|
2008-09-01 17:21:06 +04:00
|
|
|
byte_array[0] = {
|
|
|
|
};
|
2008-08-05 16:32:13 +04:00
|
|
|
End
|
2008-09-01 17:21:06 +04:00
|
|
|
end
|
|
|
|
size = bytes_code[/\[\d+\]/][1...-1].to_i
|
|
|
|
bytes_code.sub!(/^(\};\n\z)/) {
|
2008-09-01 21:40:32 +04:00
|
|
|
"\#define #{offsets_name} #{size}\n" +
|
2008-09-01 17:21:06 +04:00
|
|
|
format_offsets(min,max,offsets) + "\n" +
|
|
|
|
$1
|
|
|
|
}
|
|
|
|
size += 2+max-min+1
|
|
|
|
bytes_code.sub!(/\[\d+\]/) { "[#{size}]" }
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
|
2008-09-01 21:06:07 +04:00
|
|
|
if words_code.empty?
|
|
|
|
words_code << <<"End"
|
|
|
|
static const uintptr_t
|
|
|
|
word_array[0] = {
|
|
|
|
};
|
|
|
|
End
|
|
|
|
end
|
|
|
|
|
2008-08-05 16:32:13 +04:00
|
|
|
if n = InfosMemo[infos]
|
|
|
|
infos_name = n
|
|
|
|
else
|
|
|
|
infos_name = "#{name}_infos"
|
|
|
|
InfosMemo[infos] = infos_name
|
2008-09-01 21:06:07 +04:00
|
|
|
|
|
|
|
size = words_code[/\[\d+\]/][1...-1].to_i
|
|
|
|
words_code.sub!(/^(\};\n\z)/) {
|
|
|
|
"\#define #{infos_name} (((uintptr_t)word_array)+sizeof(uintptr_t)*#{size})\n" +
|
|
|
|
format_infos(infos) + "\n" +
|
|
|
|
$1
|
|
|
|
}
|
|
|
|
size += infos.length
|
|
|
|
words_code.sub!(/\[\d+\]/) { "[#{size}]" }
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
|
2008-09-01 21:06:07 +04:00
|
|
|
size = words_code[/\[\d+\]/][1...-1].to_i
|
|
|
|
words_code.sub!(/^(\};\n\z)/) {
|
|
|
|
"\#define #{name} ((uintptr_t)(word_array+#{size}))\n" +
|
|
|
|
<<"End" + "\n" + $1
|
2008-09-01 21:49:13 +04:00
|
|
|
#{offsets_name},
|
2008-09-01 21:06:07 +04:00
|
|
|
#{infos_name},
|
2008-08-05 16:32:13 +04:00
|
|
|
End
|
2008-09-01 21:06:07 +04:00
|
|
|
}
|
|
|
|
size += NUM_ELEM_BYTELOOKUP
|
|
|
|
words_code.sub!(/\[\d+\]/) { "[#{size}]" }
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
|
|
|
|
PreMemo = {}
|
|
|
|
PostMemo = {}
|
|
|
|
NextName = "a"
|
|
|
|
|
2008-09-01 18:16:30 +04:00
|
|
|
def generate_node(bytes_code, words_code, name_hint=nil, valid_encoding=nil)
|
2008-08-06 15:47:14 +04:00
|
|
|
if n = PreMemo[[self,valid_encoding]]
|
2008-08-05 16:32:13 +04:00
|
|
|
return n
|
|
|
|
end
|
|
|
|
|
2008-08-08 19:48:17 +04:00
|
|
|
table = Array.new(0x100, :invalid)
|
2008-08-06 15:47:14 +04:00
|
|
|
each_firstbyte(valid_encoding) {|byte, rest, rest_valid_encoding|
|
2008-08-05 16:32:13 +04:00
|
|
|
if a = rest.empty_action
|
2008-08-08 19:48:17 +04:00
|
|
|
table[byte] = a
|
2008-08-05 16:32:13 +04:00
|
|
|
else
|
|
|
|
name_hint2 = nil
|
|
|
|
name_hint2 = "#{name_hint}_#{'%02X' % byte}" if name_hint
|
2008-09-01 20:22:49 +04:00
|
|
|
table[byte] = "/*BYTE_LOOKUP*/" + rest.generate_node(bytes_code, words_code, name_hint2, rest_valid_encoding)
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
}
|
|
|
|
|
|
|
|
if n = PostMemo[table]
|
|
|
|
return n
|
|
|
|
end
|
|
|
|
|
|
|
|
if !name_hint
|
|
|
|
name_hint = "fun_" + NextName.dup
|
|
|
|
NextName.succ!
|
|
|
|
end
|
|
|
|
|
2008-08-06 15:47:14 +04:00
|
|
|
PreMemo[[self,valid_encoding]] = PostMemo[table] = name_hint
|
2008-08-05 16:32:13 +04:00
|
|
|
|
2008-09-01 18:16:30 +04:00
|
|
|
generate_lookup_node(bytes_code, words_code, name_hint, table)
|
2008-08-05 16:32:13 +04:00
|
|
|
name_hint
|
|
|
|
end
|
2008-08-31 21:35:00 +04:00
|
|
|
|
2008-09-01 18:16:30 +04:00
|
|
|
def gennode(bytes_code, words_code, name_hint=nil, valid_encoding=nil)
|
|
|
|
name = generate_node(bytes_code, words_code, name_hint, valid_encoding)
|
|
|
|
return name
|
2008-08-31 21:35:00 +04:00
|
|
|
end
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
|
2008-08-31 20:23:04 +04:00
|
|
|
def citrus_mskanji_cstomb(csid, index)
|
|
|
|
case csid
|
|
|
|
when 0
|
|
|
|
index
|
|
|
|
when 1
|
|
|
|
index + 0x80
|
|
|
|
when 2, 3
|
|
|
|
row = index >> 8
|
2008-08-31 21:49:08 +04:00
|
|
|
raise "invalid byte sequence" if row < 0x21
|
2008-08-31 20:23:04 +04:00
|
|
|
if csid == 3
|
|
|
|
if row <= 0x2F
|
|
|
|
offset = (row == 0x22 || row >= 0x26) ? 0xED : 0xF0
|
|
|
|
elsif row >= 0x4D && row <= 0x7E
|
|
|
|
offset = 0xCE
|
|
|
|
else
|
2008-08-31 21:49:08 +04:00
|
|
|
raise "invalid byte sequence"
|
2008-08-31 20:23:04 +04:00
|
|
|
end
|
|
|
|
else
|
2008-08-31 21:49:08 +04:00
|
|
|
raise "invalid byte sequence" if row > 0x97
|
2008-08-31 20:23:04 +04:00
|
|
|
offset = (row < 0x5F) ? 0x81 : 0xC1
|
|
|
|
end
|
|
|
|
col = index & 0xFF
|
2008-08-31 21:49:08 +04:00
|
|
|
raise "invalid byte sequence" if (col < 0x21 || col > 0x7E)
|
2008-08-31 20:23:04 +04:00
|
|
|
|
|
|
|
row -= 0x21
|
|
|
|
col -= 0x21
|
|
|
|
if (row & 1) == 0
|
|
|
|
col += 0x40
|
|
|
|
col += 1 if (col >= 0x7F)
|
|
|
|
else
|
|
|
|
col += 0x9F;
|
|
|
|
end
|
|
|
|
row = row / 2 + offset
|
|
|
|
(row << 8) | col
|
|
|
|
end.to_s(16)
|
|
|
|
end
|
|
|
|
|
|
|
|
def citrus_euc_cstomb(csid, index)
|
|
|
|
case csid
|
|
|
|
when 0x0000
|
|
|
|
index
|
|
|
|
when 0x8080
|
|
|
|
index | 0x8080
|
|
|
|
when 0x0080
|
|
|
|
index | 0x8E80
|
|
|
|
when 0x8000
|
|
|
|
index | 0x8F8080
|
|
|
|
end.to_s(16)
|
|
|
|
end
|
|
|
|
|
|
|
|
def citrus_cstomb(ces, csid, index)
|
|
|
|
case ces
|
|
|
|
when 'mskanji'
|
|
|
|
citrus_mskanji_cstomb(csid, index)
|
|
|
|
when 'euc'
|
|
|
|
citrus_euc_cstomb(csid, index)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
SUBDIR = %w/APPLE AST BIG5 CNS CP EBCDIC GB GEORGIAN ISO646 ISO-8859 JIS KAZAKH KOI KS MISC TCVN/
|
|
|
|
|
|
|
|
|
|
|
|
def citrus_decode_mapsrc(ces, csid, mapsrcs)
|
|
|
|
table = []
|
|
|
|
mapsrcs.split(',').each do |mapsrc|
|
|
|
|
path = [$srcdir]
|
|
|
|
mode = nil
|
|
|
|
if mapsrc.start_with?('UCS')
|
|
|
|
mode = :from_ucs
|
|
|
|
from = mapsrc[4..-1]
|
|
|
|
path << SUBDIR.find{|x| from.start_with?(x) }
|
|
|
|
else
|
|
|
|
mode = :to_ucs
|
|
|
|
path << SUBDIR.find{|x| mapsrc.start_with?(x) }
|
|
|
|
end
|
|
|
|
path << mapsrc.gsub(':', '@')
|
|
|
|
path = File.join(*path)
|
|
|
|
path << ".src"
|
|
|
|
path[path.rindex('/')] = '%'
|
2008-08-31 21:10:45 +04:00
|
|
|
STDERR.puts 'load mapsrc %s' % path if VERBOSE_MODE
|
2008-08-31 20:23:04 +04:00
|
|
|
open(path) do |f|
|
|
|
|
f.each_line do |l|
|
2008-09-01 09:54:36 +04:00
|
|
|
break if /^BEGIN_MAP/ =~ l
|
2008-08-31 20:23:04 +04:00
|
|
|
end
|
|
|
|
f.each_line do |l|
|
2008-09-01 09:54:36 +04:00
|
|
|
next if /^\s*(?:#|$)/ =~ l
|
|
|
|
break if /^END_MAP/ =~ l
|
|
|
|
case mode
|
|
|
|
when :from_ucs
|
|
|
|
case l
|
|
|
|
when /0x(\w+)\s*-\s*0x(\w+)\s*=\s*INVALID/
|
|
|
|
# Citrus OOB_MODE
|
|
|
|
when /(0x\w+)\s*=\s*(0x\w+)/
|
|
|
|
table.push << [$1.hex, citrus_cstomb(ces, csid, $2.hex)]
|
|
|
|
else
|
|
|
|
raise "unknown notation '%s'"% l
|
|
|
|
end
|
|
|
|
when :to_ucs
|
|
|
|
case l
|
|
|
|
when /(0x\w+)\s*=\s*(0x\w+)/
|
|
|
|
table.push << [citrus_cstomb(ces, csid, $1.hex), $2.hex]
|
|
|
|
else
|
|
|
|
raise "unknown notation '%s'"% l
|
|
|
|
end
|
|
|
|
end
|
2008-08-31 20:23:04 +04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return table
|
|
|
|
end
|
|
|
|
|
2008-08-05 16:32:13 +04:00
|
|
|
def encode_utf8(map)
|
|
|
|
r = []
|
|
|
|
map.each {|k, v|
|
|
|
|
# integer means UTF-8 encoded sequence.
|
|
|
|
k = [k].pack("U").unpack("H*")[0].upcase if Integer === k
|
|
|
|
v = [v].pack("U").unpack("H*")[0].upcase if Integer === v
|
|
|
|
r << [k,v]
|
|
|
|
}
|
|
|
|
r
|
|
|
|
end
|
|
|
|
|
|
|
|
def transcode_compile_tree(name, from, map)
|
|
|
|
map = encode_utf8(map)
|
|
|
|
h = {}
|
|
|
|
map.each {|k, v|
|
2008-09-01 09:54:36 +04:00
|
|
|
h[k] = v unless h[k] # use first mapping
|
2008-08-05 16:32:13 +04:00
|
|
|
}
|
|
|
|
am = ActionMap.parse(h)
|
|
|
|
|
2008-08-09 10:02:01 +04:00
|
|
|
max_input = am.max_input_length
|
|
|
|
|
2008-08-06 15:47:14 +04:00
|
|
|
if ValidEncoding[from]
|
|
|
|
valid_encoding = StrSet.parse(ValidEncoding[from])
|
|
|
|
else
|
|
|
|
valid_encoding = nil
|
|
|
|
end
|
|
|
|
|
2008-09-01 18:16:30 +04:00
|
|
|
defined_name = am.gennode(TRANSCODE_GENERATED_BYTES_CODE, TRANSCODE_GENERATED_WORDS_CODE, name, valid_encoding)
|
|
|
|
return defined_name, max_input
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
|
|
|
|
TRANSCODERS = []
|
2008-09-01 16:52:58 +04:00
|
|
|
TRANSCODE_GENERATED_BYTES_CODE = ''
|
2008-09-01 18:16:30 +04:00
|
|
|
TRANSCODE_GENERATED_WORDS_CODE = ''
|
|
|
|
TRANSCODE_GENERATED_TRANSCODER_CODE = ''
|
2008-08-05 16:32:13 +04:00
|
|
|
|
|
|
|
def transcode_tblgen(from, to, map)
|
2008-08-12 13:25:19 +04:00
|
|
|
STDERR.puts "converter from #{from} to #{to}" if VERBOSE_MODE
|
2008-08-05 16:32:13 +04:00
|
|
|
id_from = from.tr('^0-9A-Za-z', '_')
|
|
|
|
id_to = to.tr('^0-9A-Za-z', '_')
|
|
|
|
if from == "UTF-8"
|
|
|
|
tree_name = "to_#{id_to}"
|
|
|
|
elsif to == "UTF-8"
|
|
|
|
tree_name = "from_#{id_from}"
|
|
|
|
else
|
|
|
|
tree_name = "from_#{id_from}_to_#{id_to}"
|
|
|
|
end
|
|
|
|
map = encode_utf8(map)
|
2008-09-01 18:16:30 +04:00
|
|
|
real_tree_name, max_input = transcode_compile_tree(tree_name, from, map)
|
2008-08-05 16:32:13 +04:00
|
|
|
transcoder_name = "rb_#{tree_name}"
|
|
|
|
TRANSCODERS << transcoder_name
|
2008-08-09 10:02:01 +04:00
|
|
|
input_unit_length = UnitLength[from]
|
2008-08-05 16:32:13 +04:00
|
|
|
max_output = map.map {|k,v| String === v ? v.length/2 : 1 }.max
|
|
|
|
transcoder_code = <<"End"
|
|
|
|
static const rb_transcoder
|
|
|
|
#{transcoder_name} = {
|
2008-09-01 20:22:49 +04:00
|
|
|
#{c_esc from}, #{c_esc to}, #{real_tree_name},
|
2008-09-01 21:40:32 +04:00
|
|
|
byte_array, word_array, sizeof(uintptr_t),
|
2008-08-09 10:02:01 +04:00
|
|
|
#{input_unit_length}, /* input_unit_length */
|
|
|
|
#{max_input}, /* max_input */
|
|
|
|
#{max_output}, /* max_output */
|
2008-08-16 03:13:01 +04:00
|
|
|
stateless_converter, /* stateful_type */
|
2008-08-14 19:56:39 +04:00
|
|
|
NULL, NULL, NULL, NULL,
|
|
|
|
NULL, NULL, NULL
|
2008-08-05 16:32:13 +04:00
|
|
|
};
|
|
|
|
End
|
2008-09-01 18:16:30 +04:00
|
|
|
TRANSCODE_GENERATED_TRANSCODER_CODE << transcoder_code
|
2008-08-31 21:35:00 +04:00
|
|
|
''
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
|
2008-08-08 19:48:17 +04:00
|
|
|
def transcode_generate_node(am, name_hint=nil)
|
2008-08-06 02:47:44 +04:00
|
|
|
STDERR.puts "converter for #{name_hint}" if VERBOSE_MODE
|
2008-09-01 18:16:30 +04:00
|
|
|
name = am.gennode(TRANSCODE_GENERATED_BYTES_CODE, TRANSCODE_GENERATED_WORDS_CODE, name_hint)
|
2008-08-31 21:35:00 +04:00
|
|
|
''
|
|
|
|
end
|
|
|
|
|
|
|
|
def transcode_generated_code
|
2008-09-01 18:16:30 +04:00
|
|
|
TRANSCODE_GENERATED_BYTES_CODE + TRANSCODE_GENERATED_WORDS_CODE + TRANSCODE_GENERATED_TRANSCODER_CODE
|
2008-08-06 02:47:44 +04:00
|
|
|
end
|
|
|
|
|
2008-08-05 16:32:13 +04:00
|
|
|
def transcode_register_code
|
|
|
|
code = ''
|
|
|
|
TRANSCODERS.each {|transcoder_name|
|
|
|
|
code << " rb_register_transcoder(&#{transcoder_name});\n"
|
|
|
|
}
|
|
|
|
code
|
|
|
|
end
|
|
|
|
|
2008-08-08 19:48:17 +04:00
|
|
|
UnitLength = {
|
|
|
|
'UTF-16BE' => 2,
|
|
|
|
'UTF-16LE' => 2,
|
|
|
|
'UTF-32BE' => 4,
|
|
|
|
'UTF-32LE' => 4,
|
|
|
|
}
|
|
|
|
UnitLength.default = 1
|
|
|
|
|
2008-08-06 15:47:14 +04:00
|
|
|
ValidEncoding = {
|
|
|
|
'1byte' => '{00-ff}',
|
|
|
|
'2byte' => '{00-ff}{00-ff}',
|
|
|
|
'4byte' => '{00-ff}{00-ff}{00-ff}{00-ff}',
|
|
|
|
'US-ASCII' => '{00-7f}',
|
|
|
|
'UTF-8' => '{00-7f}
|
|
|
|
{c2-df}{80-bf}
|
|
|
|
e0{a0-bf}{80-bf}
|
|
|
|
{e1-ec}{80-bf}{80-bf}
|
|
|
|
ed{80-9f}{80-bf}
|
|
|
|
{ee-ef}{80-bf}{80-bf}
|
|
|
|
f0{90-bf}{80-bf}{80-bf}
|
|
|
|
{f1-f3}{80-bf}{80-bf}{80-bf}
|
|
|
|
f4{80-8f}{80-bf}{80-bf}',
|
|
|
|
'UTF-16BE' => '{00-d7,e0-ff}{00-ff}
|
|
|
|
{d8-db}{00-ff}{dc-df}{00-ff}',
|
|
|
|
'UTF-16LE' => '{00-ff}{00-d7,e0-ff}
|
|
|
|
{00-ff}{d8-db}{00-ff}{dc-df}',
|
|
|
|
'UTF-32BE' => '0000{00-d7,e0-ff}{00-ff}
|
|
|
|
00{01-10}{00-ff}{00-ff}',
|
|
|
|
'UTF-32LE' => '{00-ff}{00-d7,e0-ff}0000
|
|
|
|
{00-ff}{00-ff}{01-10}00',
|
|
|
|
'EUC-JP' => '{00-7f}
|
|
|
|
{a1-fe}{a1-fe}
|
|
|
|
8e{a1-fe}
|
|
|
|
8f{a1-fe}{a1-fe}',
|
|
|
|
'CP51932' => '{00-7f}
|
|
|
|
{a1-fe}{a1-fe}
|
|
|
|
8e{a1-fe}',
|
|
|
|
'Shift_JIS' => '{00-7f}
|
|
|
|
{81-9f,e0-fc}{40-7e,80-fc}
|
|
|
|
{a1-df}',
|
|
|
|
'EUC-KR' => '{00-7f}
|
|
|
|
{a1-fe}{a1-fe}',
|
|
|
|
'CP949' => '{00-7f}
|
|
|
|
{81-fe}{41-5a,61-7a,81-fe}',
|
|
|
|
'Big5' => '{00-7f}
|
|
|
|
{81-fe}{40-7e,a1-fe}',
|
|
|
|
'EUC-TW' => '{00-7f}
|
|
|
|
{a1-fe}{a1-fe}
|
|
|
|
8e{a1-b0}{a1-fe}{a1-fe}',
|
|
|
|
'GBK' => '{00-80}
|
|
|
|
{81-fe}{40-7e,80-fe}',
|
|
|
|
'GB18030' => '{00-7f}
|
|
|
|
{81-fe}{40-7e,80-fe}
|
|
|
|
{81-fe}{30-39}{81-fe}{30-39}',
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
2008-08-12 11:20:10 +04:00
|
|
|
'ASCII-8BIT' => '1byte',
|
2008-08-06 15:47:14 +04:00
|
|
|
'ISO-8859-1' => '1byte',
|
|
|
|
'ISO-8859-2' => '1byte',
|
|
|
|
'ISO-8859-3' => '1byte',
|
|
|
|
'ISO-8859-4' => '1byte',
|
|
|
|
'ISO-8859-5' => '1byte',
|
|
|
|
'ISO-8859-6' => '1byte',
|
|
|
|
'ISO-8859-7' => '1byte',
|
|
|
|
'ISO-8859-8' => '1byte',
|
|
|
|
'ISO-8859-9' => '1byte',
|
|
|
|
'ISO-8859-10' => '1byte',
|
|
|
|
'ISO-8859-11' => '1byte',
|
|
|
|
'ISO-8859-13' => '1byte',
|
|
|
|
'ISO-8859-14' => '1byte',
|
|
|
|
'ISO-8859-15' => '1byte',
|
|
|
|
'Windows-31J' => 'Shift_JIS',
|
2008-09-01 14:38:16 +04:00
|
|
|
'eucJP-ms' => 'EUC-JP'
|
2008-08-06 15:47:14 +04:00
|
|
|
}.each {|k, v|
|
|
|
|
ValidEncoding[k] = ValidEncoding.fetch(v)
|
2008-08-05 16:32:13 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
def make_signature(filename, src)
|
|
|
|
"src=#{filename.dump}, len=#{src.length}, checksum=#{src.sum}"
|
|
|
|
end
|
|
|
|
|
|
|
|
output_filename = nil
|
|
|
|
verbose_mode = false
|
|
|
|
force_mode = false
|
|
|
|
|
|
|
|
op = OptionParser.new
|
|
|
|
op.def_option("--help", "show help message") { puts op; exit 0 }
|
|
|
|
op.def_option("--verbose", "verbose mode") { verbose_mode = true }
|
|
|
|
op.def_option("--force", "force table generation") { force_mode = true }
|
|
|
|
op.def_option("--output=FILE", "specify output file") {|arg| output_filename = arg }
|
|
|
|
op.parse!
|
|
|
|
|
2008-08-06 02:47:44 +04:00
|
|
|
VERBOSE_MODE = verbose_mode
|
|
|
|
|
2008-08-05 16:32:13 +04:00
|
|
|
arg = ARGV.shift
|
2008-08-31 20:23:04 +04:00
|
|
|
$srcdir = File.dirname(arg)
|
|
|
|
$:.unshift $srcdir unless $:.include? $srcdir
|
2008-08-05 16:32:13 +04:00
|
|
|
src = File.read(arg)
|
|
|
|
src.force_encoding("ascii-8bit") if src.respond_to? :force_encoding
|
2008-08-10 13:10:58 +04:00
|
|
|
this_script = File.read(__FILE__)
|
|
|
|
this_script.force_encoding("ascii-8bit") if this_script.respond_to? :force_encoding
|
|
|
|
|
2008-08-05 16:32:13 +04:00
|
|
|
base_signature = "/* autogenerated. */\n"
|
2008-08-10 13:10:58 +04:00
|
|
|
base_signature << "/* #{make_signature(File.basename(__FILE__), this_script)} */\n"
|
2008-08-05 16:32:13 +04:00
|
|
|
base_signature << "/* #{make_signature(File.basename(arg), src)} */\n"
|
|
|
|
|
|
|
|
if !force_mode && output_filename && File.readable?(output_filename)
|
|
|
|
old_signature = File.open(output_filename) {|f| f.gets("").chomp }
|
|
|
|
chk_signature = base_signature.dup
|
|
|
|
old_signature.each_line {|line|
|
|
|
|
if %r{/\* src="([0-9a-z_.-]+)",} =~ line
|
|
|
|
name = $1
|
2008-08-10 13:10:58 +04:00
|
|
|
next if name == File.basename(arg) || name == File.basename(__FILE__)
|
2008-08-31 20:23:04 +04:00
|
|
|
path = File.join($srcdir, name)
|
2008-08-05 16:32:13 +04:00
|
|
|
if File.readable? path
|
|
|
|
chk_signature << "/* #{make_signature(name, File.read(path))} */\n"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
}
|
2008-08-10 13:10:58 +04:00
|
|
|
if old_signature == chk_signature
|
2008-08-05 16:32:13 +04:00
|
|
|
now = Time.now
|
|
|
|
File.utime(now, now, output_filename)
|
2008-08-06 02:47:44 +04:00
|
|
|
STDERR.puts "already up-to-date: #{output_filename}" if VERBOSE_MODE
|
2008-08-05 16:32:13 +04:00
|
|
|
exit
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2008-08-06 02:47:44 +04:00
|
|
|
if VERBOSE_MODE
|
2008-08-05 16:32:13 +04:00
|
|
|
if output_filename
|
2008-08-06 02:47:44 +04:00
|
|
|
STDERR.puts "generating #{output_filename} ..."
|
2008-08-05 16:32:13 +04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
libs1 = $".dup
|
2008-09-01 09:39:48 +04:00
|
|
|
erb = ERB.new(src, nil, '%')
|
|
|
|
erb.filename = arg
|
|
|
|
erb_result = erb.result(binding)
|
2008-08-05 16:32:13 +04:00
|
|
|
libs2 = $".dup
|
|
|
|
|
|
|
|
libs = libs2 - libs1
|
|
|
|
lib_sigs = ''
|
|
|
|
libs.each {|lib|
|
|
|
|
lib = File.basename(lib)
|
2008-08-31 20:23:04 +04:00
|
|
|
path = File.join($srcdir, lib)
|
2008-08-05 16:32:13 +04:00
|
|
|
if File.readable? path
|
|
|
|
lib_sigs << "/* #{make_signature(lib, File.read(path))} */\n"
|
|
|
|
end
|
|
|
|
}
|
|
|
|
|
|
|
|
result = ''
|
|
|
|
result << base_signature
|
|
|
|
result << lib_sigs
|
|
|
|
result << "\n"
|
|
|
|
result << erb_result
|
|
|
|
result << "\n"
|
|
|
|
|
|
|
|
if output_filename
|
|
|
|
new_filename = output_filename + ".new"
|
2008-08-10 06:45:18 +04:00
|
|
|
FileUtils.mkdir_p(File.dirname(output_filename))
|
|
|
|
File.open(new_filename, "wb") {|f| f << result }
|
2008-08-05 16:32:13 +04:00
|
|
|
File.rename(new_filename, output_filename)
|
2008-08-06 02:47:44 +04:00
|
|
|
STDERR.puts "done." if VERBOSE_MODE
|
2008-08-05 16:32:13 +04:00
|
|
|
else
|
|
|
|
print result
|
|
|
|
end
|