#!/usr/bin/perl # # The contents of this file are subject to the Netscape Public # License Version 1.1 (the "License"); you may not use this file # except in compliance with the License. You may obtain a copy of # the License at http://www.mozilla.org/NPL/ # # Software distributed under the License is distributed on an "AS # IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or # implied. See the License for the specific language governing # rights and limitations under the License. # # The Original Code is mozilla.org code. # # The Initial Developer of the Original Code is Netscape # Communications Corporation. Portions created by Netscape are # Copyright (C) 1999 Netscape Communications Corporation. All # Rights Reserved. # # Contributor(s): # ###################################################################### # # Initial global variable # ###################################################################### %utot = (); $ui=0; $li=0; ###################################################################### # # Open the unicode database file # ###################################################################### open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt") || die "cannot find UnicodeData-Latest.txt"; ###################################################################### # # Open the JIS x4501 Class file # ###################################################################### open ( CLASS , "< jisx4501class.txt") || die "cannot find jisx4501class.txt"; ###################################################################### # # Open the JIS x4501 Class simplified mapping # ###################################################################### open ( SIMP , "< jisx4501simp.txt") || die "cannot find jisx4501simp.txt"; ###################################################################### # # Open the output file # ###################################################################### open ( OUT , "> anzx4501.html") || die "cannot open output anzx4501.html file"; ###################################################################### # # Open the output file # ###################################################################### open ( HEADER , "> ../src/jisx4501class.h") || die "cannot open output ../src/jisx4501class.h file"; ###################################################################### # # Generate license and header # ###################################################################### $hthmlheader = < Analysis of JIS x4501 to Unicode General Category Mapping

Analysis of JIS x4501 to Unicode General Category Mapping

END_OF_HTML print OUT $hthmlheader; ###################################################################### # # Generate license and header # ###################################################################### $npl = <) { chop; ###################################################################### # # Get value from fields # ###################################################################### @f = split(/;/ , $_); $c = $f[0]; # The unicode value $g = $f[2]; $d = substr($g, 0, 1); $gcat{$c} = $g; $dcat{$c} = $d; $gcount{$g}++; $dcount{$d}++; } close(UNIDATA); while() { chop; ###################################################################### # # Get value from fields # ###################################################################### @f = split(/;/ , $_); $simp{$f[0]} = $f[1]; $sccount{$f[1]}++; } close(SIMP); sub GetClass{ my ($u) = @_; my $hex = DecToHex($u); $g = $gcat{$hex}; if($g ne "") { return $g; } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) { return "Han"; } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) { return "Lo"; } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) { return "Cs"; } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) { return "Cs"; } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) { return "Cs"; } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) { return "Co"; } else { printf "WARNNING !!!! Cannot find General Category for U+%s \n" , $hex; } } sub GetDClass{ my ($u) = @_; my $hex = DecToHex($u); $g = $dcat{$hex}; if($g ne "") { return $g; } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) { return "Han"; } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) { return "L"; } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) { return "C"; } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) { return "C"; } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) { return "C"; } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) { return "C"; } else { printf "WARNNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex; } } sub DecToHex{ my ($d) = @_; return sprintf("%04X", $d); } %gtotal = (); %dtotal = (); while() { chop; ###################################################################### # # Get value from fields # ###################################################################### @f = split(/;/ , $_); if( substr($f[2], 0, 1) ne "a") { $sc = $simp{$f[2]}; $l = hex($f[0]); if($f[1] eq "") { $h = $l; } else { $h = hex($f[1]); } for($k = $l; $k <= $h ; $k++) { if( exists($occ{$k})) { # printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n", # DecToHex($k), $occ{$k} , $f[2] , $sc; } else { $occ{$k} = $sc . " | " . $f[2]; $gclass = GetClass($k); $dclass = GetDClass($k); $gtotal{$sc . $gclass}++; $dtotal{$sc . $dclass}++; $u = DecToHex($k); $rk = " " . substr($u,0,2) . ":" . $sc; $rangecount{$rk}++; } } } } #print %gtotal; #print %dtotal; sub printreport { print OUT "\n"; print OUT "\n"; } print OUT "\n"; foreach $g (sort(keys %gcount)) { print OUT "\n"; } print OUT "\n"; foreach $sc (sort(keys %sccount)) { print OUT "\n"; } print OUT "\n"; foreach $g (sort(keys %gcount)) { $count = $gtotal{$sc . $g}; print OUT "\n"; } print OUT "\n"; } print OUT "
\n"; foreach $d (sort(keys %dcount)) { print OUT "$dTotal$g
$sc\n"; $total = 0; foreach $d (sort (keys %dcount)) { $count = $dtotal{$sc . $d}; $total += $count; print OUT "$count$total$count
\n"; print OUT "\n"; print OUT "\n"; } print OUT "\n"; for($rr = 0; $rr < 0x4f; $rr++) { $empty = 0; $r = sprintf("%02X" , $rr) ; $tmp = "\n", $count); $empty += $count; } $tmp .= "\n"; if($empty ne 0) { print OUT $tmp; } } print OUT "
\n"; foreach $sc (sort(keys %sccount)) { print OUT "$sc
" . $r . "\n"; foreach $sc (sort(keys %sccount)) { $count = $rangecount{ " " .$r . ":" .$sc}; $tmp .= sprintf("%s
\n"; } printreport(); sub printarray { my($r, $def) = @_; printf "[%s || %s]\n", $r, $def; $k = hex($r) * 256; printf HEADER "static PRUint32 gLBClass%s[32] = {\n", $r; for($i = 0 ; $i < 256; $i+= 8) { for($j = 7 ; $j >= 0; $j-- ) { $v = $k + $i + $j; if( exists($occ{$v})) { $p = substr($occ{$v}, 1,1); } else { $p = $def; } if($j eq 7 ) { printf HEADER "0x%s" , $p; } else { printf HEADER "%s", $p ; } } printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7); } print HEADER "};\n\n"; } printarray("00", "8"); printarray("20", "8"); printarray("21", "8"); printarray("30", "5"); #print %rangecount; ###################################################################### # # Close files # ###################################################################### close(HEADER); close(CLASS); close(OUT);