From 83703bf5c0b46a956206e725eba3dd8e7a23ab99 Mon Sep 17 00:00:00 2001 From: "ftang%netscape.com" Date: Tue, 9 Feb 1999 23:51:43 +0000 Subject: [PATCH] first check in --- intl/unicharutil/tools/gencattable.pl | 294 ++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 intl/unicharutil/tools/gencattable.pl diff --git a/intl/unicharutil/tools/gencattable.pl b/intl/unicharutil/tools/gencattable.pl new file mode 100644 index 00000000000..ac74fe0bd57 --- /dev/null +++ b/intl/unicharutil/tools/gencattable.pl @@ -0,0 +1,294 @@ +#!/usr/bin/perl +# +# The contents of this file are subject to the Netscape Public License +# Version 1.0 (the "NPL"); you may not use this file except in +# compliance with the NPL. You may obtain a copy of the NPL at +# http://www.mozilla.org/NPL/ +# +# Software distributed under the NPL is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL +# for the specific language governing rights and limitations under the +# NPL. +# +# The Initial Developer of this code under the NPL is Netscape +# Communications Corporation. Portions created by Netscape are +# Copyright (C) 1999 Netscape Communications Corporation. All Rights +# Reserved. +# + +###################################################################### +# +# Initial global variable +# +###################################################################### + +%gcount = (); +%pat = (); + +%map = ( + "M" => "1", + "N" => "2", + "Z" => "3", + "C" => "4", + "L" => "5", + "P" => "6", + "S" => "7" +); + +%special = (); + +###################################################################### +# +# Open the unicode database file +# +###################################################################### +open ( UNICODATA , "< UnicodeData-Latest.txt") + || die "cannot find UnicodeData-Latest.txt"; + +###################################################################### +# +# Open the output file +# +###################################################################### +open ( OUT , "> ../src/cattable.h") + || die "cannot open output ../src/cattable.h file"; + +###################################################################### +# +# Generate license and header +# +###################################################################### +$npl = <) { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + $c = $f[0]; # The unicode value + $n = $f[1]; # The unicode name + $g = $f[2]; # The General Category + + $cat = substr($g, 0, 1); + if(( substr($n, 0, 1) ne "<") || ($n eq "")) + { + # + # print $g; + # + + $gcount{$g}++; + $gcount{$cat}++; + $category{$c} = $cat; + # print $g . " = " . $gcount{$g} . "\n"; + } else { + + # Handle special block + @pair=split(/, /, $n ); + $catnum = $map{$cat}; + + # printf "[%s][%s] => %d\n", $pair[0], $pair[1], $catnum; + if( $pair[1] eq "First>") { + $sl{$pair[0]} = $c; + $sc{$pair[0]} = $catnum; + } elsif ( $pair[1] eq "Last>") { + $sh{$pair[0]} = $c; + if($sc{$pair[0]} ne $catnum) + { + print "WARNING !!!! error in handling special block\n\n"; + } + } else { + print "WARNING !!!! error in handling special block\n\n"; + } + } +} + +# @cats = keys(%gcount); +# foreach $cat ( sort(@cats) ) { +# $count = $gcount{$cat}; +# print "$cat ==> $count\n"; +# } + + +@range = ( + 0x0000, 0x06ff, + 0x0900, 0x11ff, + 0x1e00, 0x27ff, + 0x3000, 0x33ff, + 0xf900, 0xffff +); + + +$totaldata = 0; + +$tt=($#range+1) / 2; +$newidx = 0; +@patarray = (); + +for($t = 1; $t <= $tt; $t++) +{ + $tl = $range[($t-1) * 2]; + $th = $range[($t-1) * 2 + 1]; + $ts = ( $th - $tl ) >> 3; + $totaldata += $ts + 1; + printf OUT "static PRUint8 gGenCatIdx%d[%d] = {\n", $t, $ts + 1; + for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ ) + { + $data = 0; + for($j = 0; $j < 8 ; $j++) + { + $k = sprintf("%04X", (($i << 3) + $j)); + + $cat = $category{$k}; + if( $cat ne "") + { + $data = $data + ($map{$cat} << (4*$j)); + } + } + $pattern = sprintf("0x%08X", $data); + + $idx = $pat{$pattern}; + unless( exists($pat{$pattern})){ + $idx = $newidx++; + $patarray[$idx] = $pattern; + $pat{$pattern} = $idx; + } + + printf OUT " %3d, // U+%04X - U+%04X : %s\n" , + $idx, ($i << 3),((($i +1)<< 3)-1), $pattern ; + + + } + printf OUT "};\n\n"; + + if($t ne $tt) + { + $tl = $range[($t-1) * 2 + 1] + 1; + $th = $range[$t * 2] - 1; + for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ ) + { + $data = 0; + for($j = 0; $j < 8 ; $j++) + { + $k = sprintf("%04X", (($i << 3) + $j)); + + $cat = $category{$k}; + if( $cat ne "") + { + $data = $data + ($map{$cat} << (4*$j)); + } + } + $pattern = sprintf("0x%08X", $data); + if($data ne 0) + { + print "WARNING, Unicode Database now contain characters" . + "which we have not consider, change this program !!!\n\n"; + printf "Problem- U+%04X - U+%04X range\n", ($i << 3),((($i +1)<< 3)-1); + } + } + } +} + + +if($newidx > 255) +{ + die "We have more than 255 patterns !!! - $newidx\n\n" . + "This program is now broken!!!\n\n\n"; + +} +printf OUT "static PRUint32 gGenCatPat[$newidx] = {\n"; +for($i = 0 ; $i < $newidx; $i++) +{ + printf OUT " %s, // $i \n", $patarray[$i] ; +} +printf OUT "};\n\n"; +$totaldata += $newidx * 4; + +printf OUT "static PRUint8 GetCat(PRUnichar u)\n{\n"; +printf OUT " PRUint32 pat;\n"; +printf OUT " //\n"; +printf OUT " // Handle block which use index table mapping \n"; +printf OUT " //\n"; +for($t = 1; $t <= $tt; $t++) +{ + $tl = $range[($t-1) * 2]; + $th = $range[($t-1) * 2 + 1]; + printf OUT " // Handle U+%04X to U+%04X\n", $tl, $th; + printf OUT " if((((PRUnichar)0x%04X)<=u)&&(u<=((PRUnichar)0x%04X))) {\n", $tl, $th; + printf OUT " pat = gGenCatIdx%d [( u -(PRUnichar) 0x%04X )];\n", $t, $tl; + printf OUT " return (pat >> ((u % 8) * 4)) & 0x0F;\n"; + printf OUT " }\n\n"; +} + +printf OUT " //\n"; +printf OUT " // Handle blocks which share the same category \n"; +printf OUT " //\n"; + + +@special = keys(%sh); +foreach $s ( sort(@special) ) { + printf OUT " // Handle %s block \n", substr($s, 1,-1); + printf OUT " if((((PRUnichar)0x%s)<=u)&&(u<=((PRUnichar)0x%s))) \n", $sl{$s}, $sh{$s}; + printf OUT " return $sc{$s}; \n\n"; +} + + + +printf OUT " return 0; // UNDEFINE \n};\n"; + +printf OUT "// total data size = $totaldata\n"; +#$total = 0; +#@pats = keys(%pat); +#foreach $pattern ( sort(@pats) ) { +# $count = $pat{$pattern}; +# # print "$cat ==> $count\n"; +# $total++; +#} +print "total = $totaldata\n"; + +###################################################################### +# +# Close files +# +###################################################################### +close(UNIDATA); +close(OUT); +