зеркало из https://github.com/mozilla/pjs.git
429 строки
14 KiB
Perl
429 строки
14 KiB
Perl
#!/usr/bin/perl
|
|
#
|
|
# ***** BEGIN LICENSE BLOCK *****
|
|
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
#
|
|
# The contents of this file are subject to the Mozilla Public License Version
|
|
# 1.1 (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
# http://www.mozilla.org/MPL/
|
|
#
|
|
# Software distributed under the License is distributed on an "AS IS" basis,
|
|
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
# for the specific language governing rights and limitations under the
|
|
# License.
|
|
#
|
|
# The Original Code is mozilla.org code.
|
|
#
|
|
# The Initial Developer of the Original Code is
|
|
# Netscape Communications Corporation.
|
|
# Portions created by the Initial Developer are Copyright (C) 1999
|
|
# the Initial Developer. All Rights Reserved.
|
|
#
|
|
# Contributor(s):
|
|
#
|
|
# Alternatively, the contents of this file may be used under the terms of
|
|
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
# in which case the provisions of the GPL or the LGPL are applicable instead
|
|
# of those above. If you wish to allow use of your version of this file only
|
|
# under the terms of either the GPL or the LGPL, and not to allow others to
|
|
# use your version of this file under the terms of the MPL, indicate your
|
|
# decision by deleting the provisions above and replace them with the notice
|
|
# and other provisions required by the GPL or the LGPL. If you do not delete
|
|
# the provisions above, a recipient may use your version of this file under
|
|
# the terms of any one of the MPL, the GPL or the LGPL.
|
|
#
|
|
# ***** END LICENSE BLOCK *****
|
|
|
|
# If you run this script because the Unicode standard has been updated,
|
|
# check xpcom/string/public/nsCharTraits.h to see whether
|
|
# nsCharTraits<PRUnichar>::ASCIIToLower needs to be updated. It only
|
|
# needs to be update if the Unicode consortium adds (or removes)
|
|
# a Unicode character whose lowercase form is an ASCII character.
|
|
# Currently there are only two such characters: KELVIN SIGN and
|
|
# LATIN CAPITAL LETTER I WITH DOT ABOVE.
|
|
|
|
######################################################################
|
|
#
|
|
# Initial global variable
|
|
#
|
|
######################################################################
|
|
%utot = ();
|
|
$ui=0;
|
|
$li=0;
|
|
|
|
######################################################################
|
|
#
|
|
# Open the unicode database file
|
|
#
|
|
######################################################################
|
|
open ( UNICODATA , "< UnicodeData-Latest.txt")
|
|
|| die "cannot find UnicodeData-Latest.txt";
|
|
|
|
######################################################################
|
|
#
|
|
# Open the output file
|
|
#
|
|
######################################################################
|
|
open ( OUT , "> ../src/casetable.h")
|
|
|| die "cannot open output ../src/casetable.h file";
|
|
|
|
######################################################################
|
|
#
|
|
# Generate license and header
|
|
#
|
|
######################################################################
|
|
$npl = <<END_OF_NPL;
|
|
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Netscape Communications Corporation.
|
|
* Portions created by the Initial Developer are Copyright (C) 1999
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
/*
|
|
DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
|
|
mozilla/intl/unicharutil/tools/gencasetable.pl
|
|
*/
|
|
END_OF_NPL
|
|
print OUT $npl;
|
|
|
|
print OUT "#include \"nscore.h\" \n\n";
|
|
|
|
######################################################################
|
|
#
|
|
# Process the file line by line
|
|
#
|
|
######################################################################
|
|
while(<UNICODATA>) {
|
|
chop;
|
|
######################################################################
|
|
#
|
|
# Get value from fields
|
|
#
|
|
######################################################################
|
|
@f = split(/;/ , $_);
|
|
$c = $f[0]; # The unicode value
|
|
$u = $f[12]; # The upper case
|
|
$l = $f[13]; # The lower case
|
|
$t = $f[14]; # The title case
|
|
|
|
#
|
|
# print $c . " | " . $u . " | " . $l . " | " . $t . "\n";
|
|
#
|
|
|
|
######################################################################
|
|
#
|
|
# Process title case for this entry
|
|
#
|
|
######################################################################
|
|
#
|
|
# if upper case is not equal to title case , store into
|
|
# %utot hash
|
|
#
|
|
|
|
if(($t ne "") && ($u ne "") && ( $u ne $t )) {
|
|
#
|
|
# print $c . " | " . $u . " | " . $l . " | " . $t . "\n";
|
|
#
|
|
$utot{$u} = $t;
|
|
}
|
|
|
|
$cv = hex($c); # convert the Unicode value into integer
|
|
|
|
######################################################################
|
|
#
|
|
# Process upper case for this entry
|
|
#
|
|
######################################################################
|
|
if( $u ne "") { # if upper case exist
|
|
$uvalue = hex($u); # convert the upper case value into integer
|
|
|
|
######################################################################
|
|
# store related information into arrays
|
|
# @ucv - unicode value
|
|
# @uv - upper case value (debug only)
|
|
# @ud - difference between unicode and upper case
|
|
# @ulastd - difference between unicode and last unicode in the entry
|
|
######################################################################
|
|
|
|
$ucv[$ui] = $cv;
|
|
$uv[$ui] = $uvalue;
|
|
$ud[$ui] = 0x0000FFFF & ($uvalue - $cv);
|
|
|
|
if( $ui ne 0) {
|
|
$ulastd[$ui] = $cv - $ucv[$ui-1];
|
|
}
|
|
$ui++;
|
|
}
|
|
|
|
######################################################################
|
|
#
|
|
# Process lower case for this entry
|
|
#
|
|
######################################################################
|
|
if( $l ne "") { # if lower case exist
|
|
$lvalue = hex($l); # convert the lower case value into integer
|
|
|
|
######################################################################
|
|
# store related information into arrays
|
|
# @lcv - unicode value
|
|
# @lv - lower case value (debug only)
|
|
# @ld - difference between unicode and lower case
|
|
# @llastd - difference between unicode and last unicode in the entry
|
|
######################################################################
|
|
|
|
$lcv[$li] = $cv;
|
|
$lv[$li] = $lvalue;
|
|
$ld[$li] = 0x0000FFFF & ($lvalue - $cv);
|
|
|
|
if( $li ne 0) {
|
|
$llastd[$li] = $cv - $lcv[$li-1];
|
|
}
|
|
$li++;
|
|
}
|
|
if(( $l ne "") || ($u ne "")) { # if lower or upper case exist
|
|
$idx = $cv >> 13;
|
|
$bits = 1 << (($cv >> 8) & 0x1F) ;
|
|
$blk[$idx] |= $bits;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
######################################################################
|
|
#
|
|
# Print out all the tables
|
|
#
|
|
######################################################################
|
|
|
|
######################################################################
|
|
#
|
|
# Print out upper to title case mapping
|
|
#
|
|
######################################################################
|
|
|
|
$ttotal = 0;
|
|
print OUT "static const PRUnichar gUpperToTitle[] = { \n";
|
|
while(($upper, $title) = each(%utot)) {
|
|
print OUT " 0x" . $upper . ", 0x" . $utot{$upper} . ", \n";
|
|
$ttotal++;
|
|
}
|
|
print OUT "};\n\n";
|
|
print OUT "static const PRUint32 gUpperToTitleItems = $ttotal;\n\n";
|
|
|
|
######################################################################
|
|
#
|
|
# Print out gToUpper table
|
|
#
|
|
######################################################################
|
|
print OUT "static const PRUint16 gToUpper[] = \n";
|
|
print OUT "{ /* From To Every Diff */ \n";
|
|
$utotal=0;
|
|
$ufrom = 0; # remember the start of the output item
|
|
for ($i = 0; $i <= $#ucv; $i++)
|
|
{
|
|
if(0 eq $i) {
|
|
######################################################################
|
|
#
|
|
# Print the first item in the array
|
|
#
|
|
######################################################################
|
|
$ufrom = $ucv[0];
|
|
printf OUT " 0x%04x, " , $ucv[0];
|
|
} else {
|
|
######################################################################
|
|
#
|
|
# Print all the item except the first and last one
|
|
# only print if the upper case difference is different from the
|
|
# and the difference between last entry changed
|
|
#
|
|
######################################################################
|
|
if(($ud[$i] ne $ud[$i-1]) ||
|
|
(($ufrom ne $ucv[$i-1]) && ($ulastd[$i] ne $ulastd[$i-1]))) {
|
|
|
|
$every = 0;
|
|
if($ufrom ne $ucv[$i-1])
|
|
{
|
|
$every = $ulastd[$i-1];
|
|
}
|
|
|
|
printf OUT "((0x%02x << 8) | 0x%02x), 0x%04x ,\n",
|
|
($ucv[$i-1] - $ufrom), $every, $ud[$i-1];
|
|
|
|
if((($ucv[$i-1] - $ufrom) > 255) || ($every > 255)) {
|
|
print "WARNING!!! cannot handle block > 255 chars (Upper)\n\n";
|
|
printf "0x%04X, 0x%04x, 0x%04x), 0x%04x \n",
|
|
$ufrom, $ucv[$i-1], $every, $ud[$i-1];
|
|
}
|
|
|
|
$ufrom = $ucv[$i]; # update the start of the item
|
|
printf OUT " 0x%04x, " , $ufrom;
|
|
$utotal++;
|
|
}
|
|
}
|
|
if( $i eq $#ucv) {
|
|
######################################################################
|
|
#
|
|
# Print the last item in the array
|
|
#
|
|
######################################################################
|
|
printf OUT "((0x%02x << 8) | 0x%02x), 0x%04x \n};\n\n",
|
|
($ucv[$i] - $ufrom), $ulastd[$i], $ud[$i];
|
|
$utotal++;
|
|
print OUT "static const PRUint32 gToUpperItems = $utotal;\n\n";
|
|
}
|
|
#
|
|
# printf "%4x - %4x - %4x - %4x\n", $ucv[$i], $uv[$i], $ud[$i], $ulastd[$i];
|
|
#
|
|
}
|
|
|
|
######################################################################
|
|
#
|
|
# Print out gToLower table
|
|
#
|
|
######################################################################
|
|
print OUT "static const PRUint16 gToLower[] = \n";
|
|
print OUT "{ /* From To Every Diff */ \n";
|
|
$ltotal=0;
|
|
$lfrom = 0; # remember the start of the output item
|
|
for ($i = 0; $i <= $#lcv; $i++)
|
|
{
|
|
if(0 eq $i) {
|
|
######################################################################
|
|
#
|
|
# Print the first item in the array
|
|
#
|
|
######################################################################
|
|
$lfrom = $lcv[0];
|
|
printf OUT " 0x%04x, " , $lcv[0];
|
|
} else {
|
|
######################################################################
|
|
#
|
|
# Print all the item except the first and last one
|
|
# only print if the lower case difference is different from the
|
|
# and the difference between last entry changed
|
|
#
|
|
######################################################################
|
|
if(($ld[$i] ne $ld[$i-1]) ||
|
|
(($lfrom ne $lcv[$i-1]) && ($llastd[$i] ne $llastd[$i-1]))) {
|
|
|
|
$every = 0;
|
|
if($lfrom ne $lcv[$i-1])
|
|
{
|
|
$every = $llastd[$i-1];
|
|
}
|
|
|
|
printf OUT "((0x%02x << 8) | 0x%02x), 0x%04x ,\n",
|
|
($lcv[$i-1] - $lfrom) , $every, $ld[$i-1];
|
|
|
|
if((($lcv[$i-1] - $lfrom) > 255) || ($every > 255)) {
|
|
print "WARNING!!! cannot handle block > 255 chars (Lower)\n\n";
|
|
printf "0x%04X, 0x%04x, 0x%04x, 0x%04x \n",
|
|
$lfrom, $lcv[$i-1], $every, $ld[$i-1];
|
|
}
|
|
$lfrom = $lcv[$i]; # update the start of the item
|
|
printf OUT " 0x%04x, " , $lfrom;
|
|
$ltotal++;
|
|
}
|
|
}
|
|
if( $i eq $#lcv) {
|
|
######################################################################
|
|
#
|
|
# Print the last item in the array
|
|
#
|
|
######################################################################
|
|
printf OUT "((0x%02x << 8) | 0x%02x), 0x%04x \n};\n\n",
|
|
($lcv[$i] - $lfrom), $llastd[$i], $ld[$i];
|
|
$ltotal++;
|
|
print OUT "static const PRUint32 gToLowerItems = $ltotal;\n\n";
|
|
}
|
|
#
|
|
# printf "%4x - %4x - %4x - %4x\n", $lcv[$i], $lv[$i], $ld[$i], $llastd[$i];
|
|
#
|
|
}
|
|
|
|
print OUT "static const PRUint32 gCaseBlocks [8] = {\n";
|
|
for($idx=0;$idx<8;$idx++)
|
|
{
|
|
printf OUT "0x%08X", $blk[$idx];
|
|
if($idx != 7) {
|
|
printf OUT ",\n";
|
|
} else {
|
|
printf OUT "\n";
|
|
}
|
|
}
|
|
print OUT "};\n\n";
|
|
|
|
######################################################################
|
|
#
|
|
# Print out gASCIIToLower table
|
|
#
|
|
######################################################################
|
|
print OUT "// We map x -> x, except for upper-case letters,\n";
|
|
print OUT "// which we map to their lower-case equivalents.\n";
|
|
print OUT "static const PRUint8 gASCIIToLower [128] = {\n";
|
|
|
|
# Map x -> x, except for upper-case letters, which we map to lower-case
|
|
# letters.
|
|
for($idx=0; $idx < 128; $idx++)
|
|
{
|
|
if ($idx % 16 == 0) {
|
|
print OUT " "
|
|
}
|
|
|
|
if (65 <= $idx && $idx <= 90) {
|
|
printf OUT "0x%02x", ($idx + 0x20);
|
|
} else {
|
|
printf OUT "0x%02x", $idx;
|
|
}
|
|
|
|
if (($idx+1) % 16 != 0) {
|
|
print OUT ", ";
|
|
}
|
|
else {
|
|
print OUT ",\n";
|
|
}
|
|
}
|
|
print OUT "};\n";
|
|
|
|
|
|
######################################################################
|
|
#
|
|
# Close files
|
|
#
|
|
######################################################################
|
|
close(UNIDATA);
|
|
close(OUT);
|
|
|