bug 210502 : update category table to Unicode 4.1.0 (r=smontagu, sr=dbaron)

This commit is contained in:
jshin%mailaps.org 2005-04-06 08:31:26 +00:00
Родитель cbbe1c749a
Коммит a7b2338136
2 изменённых файлов: 2838 добавлений и 1438 удалений

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -21,6 +21,7 @@
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Jungshik Shin <jshin@i18nl10n.com>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
@ -78,8 +79,8 @@ open ( OUT , "> ../src/cattable.h")
# Generate license and header
#
######################################################################
$npl = <<END_OF_NPL;
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
$mpl = <<END_OF_MPL;
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@ -119,8 +120,9 @@ $npl = <<END_OF_NPL;
DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
mozilla/intl/unicharutil/tools/gencattable.pl
*/
END_OF_NPL
print OUT $npl;
END_OF_MPL
print OUT $mpl;
print OUT "#include \"nscore.h\" \n\n";
@ -130,6 +132,9 @@ print OUT "#include \"nscore.h\" \n\n";
%sl = ();
%sc = ();
$prevcjkcomp = 0;
$cjkcompidx = 0;
######################################################################
#
# Process the file line by line
@ -148,7 +153,28 @@ while(<UNICODATA>) {
$g = $f[2]; # The General Category
$cat = substr($g, 0, 1);
if(( substr($n, 0, 1) ne "<") || ($n eq "<control>"))
# All CJK Compatibility ideographs belong to Lo
if ($n =~ /^CJK COMPATIBILITY IDEOGRAPH/)
{
$catnum = $map{$cat};
if ($cat ne "L") {
print "WARNING !!!! " . "
error in handling CJK Compatibility Ideograph block\n\n";
}
if (hex($prevcjkcomp) + 1 != hex($c))
{
if (hex($prevcjkcomp) != 0)
{
$sh{$cjkcompkey} = $prevcjkcomp;
}
$cjkcompkey = sprintf("CJK Compatibility #%d", ++$cjkcompidx);
$sl{$cjkcompkey} = $c;
$sc{$cjkcompkey} = $catnum;
push @special, $cjkcompkey;
}
$prevcjkcomp = $c;
}
elsif(( substr($n, 0, 1) ne "<") || ($n eq "<control>"))
{
#
# print $g;
@ -163,11 +189,13 @@ while(<UNICODATA>) {
# Handle special block
@pair=split(/, /, $n );
$catnum = $map{$cat};
$pair[0] =~ s/^<//;
# printf "[%s][%s] => %d\n", $pair[0], $pair[1], $catnum;
if( $pair[1] eq "First>") {
$sl{$pair[0]} = $c;
$sc{$pair[0]} = $catnum;
push @special, $pair[0];
} elsif ( $pair[1] eq "Last>") {
$sh{$pair[0]} = $c;
if($sc{$pair[0]} ne $catnum)
@ -180,6 +208,9 @@ while(<UNICODATA>) {
}
}
# take care of the last CJK Compatibility block
$sh{$cjkcompkey} = $prevcjkcomp;
# @cats = keys(%gcount);
# foreach $cat ( sort(@cats) ) {
# $count = $gcount{$cat};
@ -188,19 +219,27 @@ while(<UNICODATA>) {
@range = (
0x0000, 0x06ff,
0x0900, 0x11ff,
0x1e00, 0x27ff,
0x3000, 0x33ff,
0xf900, 0xffff
0x0000, 0x07ff,
0x0900, 0x1a1f,
0x1d00, 0x33ff,
0x4dc0, 0x4dff,
0xa000, 0xa4ff,
0xa700, 0xa82f,
0xfb00, 0xffff,
0x10000, 0x104af,
0x10800, 0x1083f,
0x10a00, 0x10a5f,
0x1d000, 0x1d7ff
);
$totaldata = 0;
$tt=($#range+1) / 2;
$newidx = 0;
@newidx = (0);
@patarray = ();
$oldplane = -1;
@planes = ();
for($t = 1; $t <= $tt; $t++)
{
@ -208,6 +247,20 @@ for($t = 1; $t <= $tt; $t++)
$th = $range[($t-1) * 2 + 1];
$ts = ( $th - $tl ) >> 3;
$totaldata += $ts + 1;
$plane = $tl >> 16;
if ($oldplane != $plane) {
if ($oldplane != -1) {
printf STDERR "Plane %d has %d patterns\n", $oldplane, $newidx[$oldplane];
if ($newidx[$plane] > 256) {
printf STDERR "We have more than 256 patterns for plane %d\n", $oldplane;
die "This program is now broken!!!\n\n\n";
}
}
$newidx[$plane] = 0;
push @planes, $plane;
}
$oldplane = $plane;
printf OUT "static const PRUint8 gGenCatIdx%d[%d] = {\n", $t, $ts + 1;
for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
{
@ -223,15 +276,16 @@ for($t = 1; $t <= $tt; $t++)
}
}
$pattern = sprintf("0x%08X", $data);
$idx = $pat{$pattern};
unless( exists($pat{$pattern})){
$idx = $newidx++;
$patarray[$idx] = $pattern;
$pat{$pattern} = $idx;
$idx = $pat[$plane]{$pattern};
unless( exists($pat[$plane]{$pattern})){
$idx = $newidx[$plane]++;
$patarray[$plane][$idx] = $pattern;
$pat[$plane]{$pattern} = $idx;
}
printf OUT " %3d, // U+%04X - U+%04X : %s\n" ,
printf OUT " %3d, // U+%06X - U+%06X : %s\n" ,
$idx, ($i << 3),((($i +1)<< 3)-1), $pattern ;
@ -258,30 +312,31 @@ for($t = 1; $t <= $tt; $t++)
$pattern = sprintf("0x%08X", $data);
if($data ne 0)
{
print "WARNING, Unicode Database now contain characters" .
"which we have not consider, change this program !!!\n\n";
printf "Problem- U+%04X - U+%04X range\n", ($i << 3),((($i +1)<< 3)-1);
print "WARNING, Unicode Database now contain characters " .
"which we have not considered. change this program !!!\n\n";
printf "Problem- U+%06X - U+%06X range\n", ($i << 3),((($i +1)<< 3)-1);
}
}
}
}
if($newidx > 255)
{
die "We have more than 255 patterns !!! - $newidx\n\n" .
"This program is now broken!!!\n\n\n";
printf STDERR "Plane %d has %d patterns\n", $plane, $newidx[$plane];
if ($newidx[$plane] > 256) {
printf STDERR "We have more than 256 patterns for plane %d\n", $plane;
die "This program is now broken!!!\n\n\n";
}
printf OUT "static const PRUint32 gGenCatPat[$newidx] = {\n";
for($i = 0 ; $i < $newidx; $i++)
{
printf OUT " %s, // $i \n", $patarray[$i] ;
}
printf OUT "};\n\n";
$totaldata += $newidx * 4;
printf OUT "static PRUint8 GetCat(PRUnichar u)\n{\n";
for $plane (@planes) {
printf OUT "static const PRUint32 gGenCatPatPl%d[$newidx] = {\n", $plane;
for($i = 0 ; $i < $newidx[$plane]; $i++)
{
printf OUT " %s, // $i \n", $patarray[$plane][$i] ;
}
printf OUT "}; \n\n";
$totaldata += $newidx[$plane] * 4;
}
printf OUT "static PRUint8 GetCat(PRUint32 u)\n{\n";
printf OUT " PRUint32 pat;\n";
printf OUT " //\n";
printf OUT " // Handle block which use index table mapping \n";
@ -290,9 +345,12 @@ for($t = 1; $t <= $tt; $t++)
{
$tl = $range[($t-1) * 2];
$th = $range[($t-1) * 2 + 1];
printf OUT " // Handle U+%04X to U+%04X\n", $tl, $th;
printf OUT " if((((PRUnichar)0x%04X)<=u)&&(u<=((PRUnichar)0x%04X))) {\n", $tl, $th;
printf OUT " pat = gGenCatPat[gGenCatIdx%d [( u -(PRUnichar) 0x%04X ) / 8]];\n", $t, $tl;
$plane = $tl >> 16;
printf OUT " // Handle U+%06X to U+%06X\n", $tl, $th;
printf OUT " if(0x%06X <= u && u <= 0x%06X) {\n", $tl, $th;
printf OUT " pat = " .
"gGenCatPatPl%d[gGenCatIdx%d [( u - 0x%06X ) / 8]];\n",
$plane, $t, $tl;
printf OUT " return (pat >> ((u % 8) * 4)) & 0x0F;\n";
printf OUT " }\n\n";
}
@ -302,10 +360,10 @@ printf OUT " // Handle blocks which share the same category \n";
printf OUT " //\n";
@special = keys(%sh);
foreach $s ( sort(@special) ) {
printf OUT " // Handle %s block \n", substr($s, 1,-1);
printf OUT " if((((PRUnichar)0x%s)<=u)&&(u<=((PRUnichar)0x%s))) \n", $sl{$s}, $sh{$s};
#@special = keys(%sh);
foreach $s ( @special ) {
printf OUT " // Handle %s block \n", $s;
printf OUT " if(0x%s <= u && u <= 0x%s) \n", $sl{$s}, $sh{$s};
printf OUT " return $sc{$s}; \n\n";
}