зеркало из https://github.com/mozilla/pjs.git
bug 210502 : update category table to Unicode 4.1.0 (r=smontagu, sr=dbaron)
This commit is contained in:
Родитель
cbbe1c749a
Коммит
a7b2338136
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -21,6 +21,7 @@
|
|||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jungshik Shin <jshin@i18nl10n.com>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
|
@ -78,8 +79,8 @@ open ( OUT , "> ../src/cattable.h")
|
|||
# Generate license and header
|
||||
#
|
||||
######################################################################
|
||||
$npl = <<END_OF_NPL;
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
$mpl = <<END_OF_MPL;
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
|
@ -119,8 +120,9 @@ $npl = <<END_OF_NPL;
|
|||
DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
|
||||
mozilla/intl/unicharutil/tools/gencattable.pl
|
||||
*/
|
||||
END_OF_NPL
|
||||
print OUT $npl;
|
||||
END_OF_MPL
|
||||
|
||||
print OUT $mpl;
|
||||
|
||||
print OUT "#include \"nscore.h\" \n\n";
|
||||
|
||||
|
@ -130,6 +132,9 @@ print OUT "#include \"nscore.h\" \n\n";
|
|||
%sl = ();
|
||||
%sc = ();
|
||||
|
||||
$prevcjkcomp = 0;
|
||||
$cjkcompidx = 0;
|
||||
|
||||
######################################################################
|
||||
#
|
||||
# Process the file line by line
|
||||
|
@ -148,7 +153,28 @@ while(<UNICODATA>) {
|
|||
$g = $f[2]; # The General Category
|
||||
|
||||
$cat = substr($g, 0, 1);
|
||||
if(( substr($n, 0, 1) ne "<") || ($n eq "<control>"))
|
||||
# All CJK Compatibility ideographs belong to Lo
|
||||
if ($n =~ /^CJK COMPATIBILITY IDEOGRAPH/)
|
||||
{
|
||||
$catnum = $map{$cat};
|
||||
if ($cat ne "L") {
|
||||
print "WARNING !!!! " . "
|
||||
error in handling CJK Compatibility Ideograph block\n\n";
|
||||
}
|
||||
if (hex($prevcjkcomp) + 1 != hex($c))
|
||||
{
|
||||
if (hex($prevcjkcomp) != 0)
|
||||
{
|
||||
$sh{$cjkcompkey} = $prevcjkcomp;
|
||||
}
|
||||
$cjkcompkey = sprintf("CJK Compatibility #%d", ++$cjkcompidx);
|
||||
$sl{$cjkcompkey} = $c;
|
||||
$sc{$cjkcompkey} = $catnum;
|
||||
push @special, $cjkcompkey;
|
||||
}
|
||||
$prevcjkcomp = $c;
|
||||
}
|
||||
elsif(( substr($n, 0, 1) ne "<") || ($n eq "<control>"))
|
||||
{
|
||||
#
|
||||
# print $g;
|
||||
|
@ -163,11 +189,13 @@ while(<UNICODATA>) {
|
|||
# Handle special block
|
||||
@pair=split(/, /, $n );
|
||||
$catnum = $map{$cat};
|
||||
$pair[0] =~ s/^<//;
|
||||
|
||||
# printf "[%s][%s] => %d\n", $pair[0], $pair[1], $catnum;
|
||||
if( $pair[1] eq "First>") {
|
||||
$sl{$pair[0]} = $c;
|
||||
$sc{$pair[0]} = $catnum;
|
||||
push @special, $pair[0];
|
||||
} elsif ( $pair[1] eq "Last>") {
|
||||
$sh{$pair[0]} = $c;
|
||||
if($sc{$pair[0]} ne $catnum)
|
||||
|
@ -180,6 +208,9 @@ while(<UNICODATA>) {
|
|||
}
|
||||
}
|
||||
|
||||
# take care of the last CJK Compatibility block
|
||||
$sh{$cjkcompkey} = $prevcjkcomp;
|
||||
|
||||
# @cats = keys(%gcount);
|
||||
# foreach $cat ( sort(@cats) ) {
|
||||
# $count = $gcount{$cat};
|
||||
|
@ -188,19 +219,27 @@ while(<UNICODATA>) {
|
|||
|
||||
|
||||
@range = (
|
||||
0x0000, 0x06ff,
|
||||
0x0900, 0x11ff,
|
||||
0x1e00, 0x27ff,
|
||||
0x3000, 0x33ff,
|
||||
0xf900, 0xffff
|
||||
0x0000, 0x07ff,
|
||||
0x0900, 0x1a1f,
|
||||
0x1d00, 0x33ff,
|
||||
0x4dc0, 0x4dff,
|
||||
0xa000, 0xa4ff,
|
||||
0xa700, 0xa82f,
|
||||
0xfb00, 0xffff,
|
||||
0x10000, 0x104af,
|
||||
0x10800, 0x1083f,
|
||||
0x10a00, 0x10a5f,
|
||||
0x1d000, 0x1d7ff
|
||||
);
|
||||
|
||||
|
||||
$totaldata = 0;
|
||||
|
||||
$tt=($#range+1) / 2;
|
||||
$newidx = 0;
|
||||
@newidx = (0);
|
||||
@patarray = ();
|
||||
$oldplane = -1;
|
||||
@planes = ();
|
||||
|
||||
for($t = 1; $t <= $tt; $t++)
|
||||
{
|
||||
|
@ -208,6 +247,20 @@ for($t = 1; $t <= $tt; $t++)
|
|||
$th = $range[($t-1) * 2 + 1];
|
||||
$ts = ( $th - $tl ) >> 3;
|
||||
$totaldata += $ts + 1;
|
||||
$plane = $tl >> 16;
|
||||
if ($oldplane != $plane) {
|
||||
if ($oldplane != -1) {
|
||||
printf STDERR "Plane %d has %d patterns\n", $oldplane, $newidx[$oldplane];
|
||||
if ($newidx[$plane] > 256) {
|
||||
printf STDERR "We have more than 256 patterns for plane %d\n", $oldplane;
|
||||
die "This program is now broken!!!\n\n\n";
|
||||
}
|
||||
}
|
||||
$newidx[$plane] = 0;
|
||||
push @planes, $plane;
|
||||
}
|
||||
$oldplane = $plane;
|
||||
|
||||
printf OUT "static const PRUint8 gGenCatIdx%d[%d] = {\n", $t, $ts + 1;
|
||||
for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
|
||||
{
|
||||
|
@ -223,15 +276,16 @@ for($t = 1; $t <= $tt; $t++)
|
|||
}
|
||||
}
|
||||
$pattern = sprintf("0x%08X", $data);
|
||||
|
||||
|
||||
$idx = $pat{$pattern};
|
||||
unless( exists($pat{$pattern})){
|
||||
$idx = $newidx++;
|
||||
$patarray[$idx] = $pattern;
|
||||
$pat{$pattern} = $idx;
|
||||
$idx = $pat[$plane]{$pattern};
|
||||
unless( exists($pat[$plane]{$pattern})){
|
||||
$idx = $newidx[$plane]++;
|
||||
$patarray[$plane][$idx] = $pattern;
|
||||
$pat[$plane]{$pattern} = $idx;
|
||||
}
|
||||
|
||||
printf OUT " %3d, // U+%04X - U+%04X : %s\n" ,
|
||||
printf OUT " %3d, // U+%06X - U+%06X : %s\n" ,
|
||||
$idx, ($i << 3),((($i +1)<< 3)-1), $pattern ;
|
||||
|
||||
|
||||
|
@ -258,30 +312,31 @@ for($t = 1; $t <= $tt; $t++)
|
|||
$pattern = sprintf("0x%08X", $data);
|
||||
if($data ne 0)
|
||||
{
|
||||
print "WARNING, Unicode Database now contain characters" .
|
||||
"which we have not consider, change this program !!!\n\n";
|
||||
printf "Problem- U+%04X - U+%04X range\n", ($i << 3),((($i +1)<< 3)-1);
|
||||
print "WARNING, Unicode Database now contain characters " .
|
||||
"which we have not considered. change this program !!!\n\n";
|
||||
printf "Problem- U+%06X - U+%06X range\n", ($i << 3),((($i +1)<< 3)-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if($newidx > 255)
|
||||
{
|
||||
die "We have more than 255 patterns !!! - $newidx\n\n" .
|
||||
"This program is now broken!!!\n\n\n";
|
||||
|
||||
printf STDERR "Plane %d has %d patterns\n", $plane, $newidx[$plane];
|
||||
if ($newidx[$plane] > 256) {
|
||||
printf STDERR "We have more than 256 patterns for plane %d\n", $plane;
|
||||
die "This program is now broken!!!\n\n\n";
|
||||
}
|
||||
printf OUT "static const PRUint32 gGenCatPat[$newidx] = {\n";
|
||||
for($i = 0 ; $i < $newidx; $i++)
|
||||
{
|
||||
printf OUT " %s, // $i \n", $patarray[$i] ;
|
||||
}
|
||||
printf OUT "};\n\n";
|
||||
$totaldata += $newidx * 4;
|
||||
|
||||
printf OUT "static PRUint8 GetCat(PRUnichar u)\n{\n";
|
||||
for $plane (@planes) {
|
||||
printf OUT "static const PRUint32 gGenCatPatPl%d[$newidx] = {\n", $plane;
|
||||
for($i = 0 ; $i < $newidx[$plane]; $i++)
|
||||
{
|
||||
printf OUT " %s, // $i \n", $patarray[$plane][$i] ;
|
||||
}
|
||||
printf OUT "}; \n\n";
|
||||
$totaldata += $newidx[$plane] * 4;
|
||||
}
|
||||
|
||||
printf OUT "static PRUint8 GetCat(PRUint32 u)\n{\n";
|
||||
printf OUT " PRUint32 pat;\n";
|
||||
printf OUT " //\n";
|
||||
printf OUT " // Handle block which use index table mapping \n";
|
||||
|
@ -290,9 +345,12 @@ for($t = 1; $t <= $tt; $t++)
|
|||
{
|
||||
$tl = $range[($t-1) * 2];
|
||||
$th = $range[($t-1) * 2 + 1];
|
||||
printf OUT " // Handle U+%04X to U+%04X\n", $tl, $th;
|
||||
printf OUT " if((((PRUnichar)0x%04X)<=u)&&(u<=((PRUnichar)0x%04X))) {\n", $tl, $th;
|
||||
printf OUT " pat = gGenCatPat[gGenCatIdx%d [( u -(PRUnichar) 0x%04X ) / 8]];\n", $t, $tl;
|
||||
$plane = $tl >> 16;
|
||||
printf OUT " // Handle U+%06X to U+%06X\n", $tl, $th;
|
||||
printf OUT " if(0x%06X <= u && u <= 0x%06X) {\n", $tl, $th;
|
||||
printf OUT " pat = " .
|
||||
"gGenCatPatPl%d[gGenCatIdx%d [( u - 0x%06X ) / 8]];\n",
|
||||
$plane, $t, $tl;
|
||||
printf OUT " return (pat >> ((u % 8) * 4)) & 0x0F;\n";
|
||||
printf OUT " }\n\n";
|
||||
}
|
||||
|
@ -302,10 +360,10 @@ printf OUT " // Handle blocks which share the same category \n";
|
|||
printf OUT " //\n";
|
||||
|
||||
|
||||
@special = keys(%sh);
|
||||
foreach $s ( sort(@special) ) {
|
||||
printf OUT " // Handle %s block \n", substr($s, 1,-1);
|
||||
printf OUT " if((((PRUnichar)0x%s)<=u)&&(u<=((PRUnichar)0x%s))) \n", $sl{$s}, $sh{$s};
|
||||
#@special = keys(%sh);
|
||||
foreach $s ( @special ) {
|
||||
printf OUT " // Handle %s block \n", $s;
|
||||
printf OUT " if(0x%s <= u && u <= 0x%s) \n", $sl{$s}, $sh{$s};
|
||||
printf OUT " return $sc{$s}; \n\n";
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче