зеркало из https://github.com/mozilla/gecko-dev.git
check in bidi tools and data files for smontagu@il.ibm.com
r=ftang@netscape.com sr=erik@netscape.com b=62777 there are no direct connection to the build system so it won't impact and build status on tinderbox
This commit is contained in:
Родитель
e3864a18c9
Коммит
2e64c3a69c
|
@ -0,0 +1,238 @@
|
|||
# BidiMirroring-1.txt
|
||||
#
|
||||
# This file is an informative supplement to the UnicodeData file. It
|
||||
# lists characters that have the mirrored property
|
||||
# where there is another Unicode character that typically has a glyph
|
||||
# that is the mirror image of the original character's glyph.
|
||||
# The repertoire covered by the file is Unicode 3.0.1.
|
||||
#
|
||||
# The file contains a list of lines with mappings from one code point
|
||||
# to another one for character-based mirroring.
|
||||
# Note that for "real" mirroring, a rendering engine needs to select
|
||||
# appropriate alternative glyphs, and that many Unicode characters do not
|
||||
# have a mirror-image Unicode character.
|
||||
#
|
||||
# Each mapping line contains two fields, separated by a semicolon (';').
|
||||
# Each of the two fields contains a code point represented as a
|
||||
# variable-length hexadecimal value with 4 to 6 digits.
|
||||
# A comment indicates where the characters are "BEST FIT" mirroring.
|
||||
#
|
||||
# Code points with the "mirrored" property but no appropriate mirrors are
|
||||
# listed as comments at the end of the file.
|
||||
#
|
||||
# For information on bidi mirroring, see UTR #21: Bidirectional Algorithm,
|
||||
# at http://www.unicode.org/unicode/reports/tr9/
|
||||
#
|
||||
# Please address any comments to <errata@unicode.org>.
|
||||
# Note that this is an archival address: messages will be checked,
|
||||
# but do not expect an immediate response.
|
||||
#
|
||||
# This file was originally created by Markus Scherer
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
0028; 0029 # LEFT PARENTHESIS
|
||||
0029; 0028 # RIGHT PARENTHESIS
|
||||
003C; 003E # LESS-THAN SIGN
|
||||
003E; 003C # GREATER-THAN SIGN
|
||||
005B; 005D # LEFT SQUARE BRACKET
|
||||
005D; 005B # RIGHT SQUARE BRACKET
|
||||
007B; 007D # LEFT CURLY BRACKET
|
||||
007D; 007B # RIGHT CURLY BRACKET
|
||||
00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
2045; 2046 # LEFT SQUARE BRACKET WITH QUILL
|
||||
2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL
|
||||
207D; 207E # SUPERSCRIPT LEFT PARENTHESIS
|
||||
207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS
|
||||
208D; 208E # SUBSCRIPT LEFT PARENTHESIS
|
||||
208E; 208D # SUBSCRIPT RIGHT PARENTHESIS
|
||||
2208; 220B # ELEMENT OF
|
||||
2209; 220C # NOT AN ELEMENT OF
|
||||
220A; 220D # SMALL ELEMENT OF
|
||||
220B; 2208 # CONTAINS AS MEMBER
|
||||
220C; 2209 # DOES NOT CONTAIN AS MEMBER
|
||||
220D; 220A # SMALL CONTAINS AS MEMBER
|
||||
223C; 223D # TILDE OPERATOR
|
||||
223D; 223C # REVERSED TILDE
|
||||
2243; 22CD # ASYMPTOTICALLY EQUAL TO
|
||||
2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
|
||||
2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO
|
||||
2254; 2255 # COLON EQUALS
|
||||
2255; 2254 # EQUALS COLON
|
||||
2264; 2265 # LESS-THAN OR EQUAL TO
|
||||
2265; 2264 # GREATER-THAN OR EQUAL TO
|
||||
2266; 2267 # LESS-THAN OVER EQUAL TO
|
||||
2267; 2266 # GREATER-THAN OVER EQUAL TO
|
||||
2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO
|
||||
2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO
|
||||
226A; 226B # MUCH LESS-THAN
|
||||
226B; 226A # MUCH GREATER-THAN
|
||||
226E; 226F # [BEST FIT] NOT LESS-THAN
|
||||
226F; 226E # [BEST FIT] NOT GREATER-THAN
|
||||
2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO
|
||||
2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO
|
||||
2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO
|
||||
2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO
|
||||
2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO
|
||||
2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO
|
||||
2276; 2277 # LESS-THAN OR GREATER-THAN
|
||||
2277; 2276 # GREATER-THAN OR LESS-THAN
|
||||
2278; 2279 # NEITHER LESS-THAN NOR GREATER-THAN
|
||||
2279; 2278 # NEITHER GREATER-THAN NOR LESS-THAN
|
||||
227A; 227B # PRECEDES
|
||||
227B; 227A # SUCCEEDS
|
||||
227C; 227D # PRECEDES OR EQUAL TO
|
||||
227D; 227C # SUCCEEDS OR EQUAL TO
|
||||
227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO
|
||||
227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO
|
||||
2280; 2281 # [BEST FIT] DOES NOT PRECEDE
|
||||
2281; 2280 # [BEST FIT] DOES NOT SUCCEED
|
||||
2282; 2283 # SUBSET OF
|
||||
2283; 2282 # SUPERSET OF
|
||||
2284; 2285 # [BEST FIT] NOT A SUBSET OF
|
||||
2285; 2284 # [BEST FIT] NOT A SUPERSET OF
|
||||
2286; 2287 # SUBSET OF OR EQUAL TO
|
||||
2287; 2286 # SUPERSET OF OR EQUAL TO
|
||||
2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO
|
||||
2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO
|
||||
228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO
|
||||
228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO
|
||||
228F; 2290 # SQUARE IMAGE OF
|
||||
2290; 228F # SQUARE ORIGINAL OF
|
||||
2291; 2292 # SQUARE IMAGE OF OR EQUAL TO
|
||||
2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO
|
||||
22A2; 22A3 # RIGHT TACK
|
||||
22A3; 22A2 # LEFT TACK
|
||||
22B0; 22B1 # PRECEDES UNDER RELATION
|
||||
22B1; 22B0 # SUCCEEDS UNDER RELATION
|
||||
22B2; 22B3 # NORMAL SUBGROUP OF
|
||||
22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP
|
||||
22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO
|
||||
22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
|
||||
22B6; 22B7 # ORIGINAL OF
|
||||
22B7; 22B6 # IMAGE OF
|
||||
22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
|
||||
22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
|
||||
22CB; 22CC # LEFT SEMIDIRECT PRODUCT
|
||||
22CC; 22CB # RIGHT SEMIDIRECT PRODUCT
|
||||
22CD; 2243 # REVERSED TILDE EQUALS
|
||||
22D0; 22D1 # DOUBLE SUBSET
|
||||
22D1; 22D0 # DOUBLE SUPERSET
|
||||
22D6; 22D7 # LESS-THAN WITH DOT
|
||||
22D7; 22D6 # GREATER-THAN WITH DOT
|
||||
22D8; 22D9 # VERY MUCH LESS-THAN
|
||||
22D9; 22D8 # VERY MUCH GREATER-THAN
|
||||
22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN
|
||||
22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN
|
||||
22DC; 22DD # EQUAL TO OR LESS-THAN
|
||||
22DD; 22DC # EQUAL TO OR GREATER-THAN
|
||||
22DE; 22DF # EQUAL TO OR PRECEDES
|
||||
22DF; 22DE # EQUAL TO OR SUCCEEDS
|
||||
22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL
|
||||
22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL
|
||||
22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO
|
||||
22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO
|
||||
22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO
|
||||
22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO
|
||||
22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO
|
||||
22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO
|
||||
22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO
|
||||
22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO
|
||||
22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF
|
||||
22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP
|
||||
22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO
|
||||
22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
|
||||
22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS
|
||||
22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS
|
||||
2308; 2309 # LEFT CEILING
|
||||
2309; 2308 # RIGHT CEILING
|
||||
230A; 230B # LEFT FLOOR
|
||||
230B; 230A # RIGHT FLOOR
|
||||
2329; 232A # LEFT-POINTING ANGLE BRACKET
|
||||
232A; 2329 # RIGHT-POINTING ANGLE BRACKET
|
||||
3008; 3009 # LEFT ANGLE BRACKET
|
||||
3009; 3008 # RIGHT ANGLE BRACKET
|
||||
300A; 300B # LEFT DOUBLE ANGLE BRACKET
|
||||
300B; 300A # RIGHT DOUBLE ANGLE BRACKET
|
||||
300C; 300D # [BEST FIT] LEFT CORNER BRACKET
|
||||
300D; 300C # [BEST FIT] RIGHT CORNER BRACKET
|
||||
300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET
|
||||
300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET
|
||||
3010; 3011 # LEFT BLACK LENTICULAR BRACKET
|
||||
3011; 3010 # RIGHT BLACK LENTICULAR BRACKET
|
||||
3014; 3015 # [BEST FIT] LEFT TORTOISE SHELL BRACKET
|
||||
3015; 3014 # [BEST FIT] RIGHT TORTOISE SHELL BRACKET
|
||||
3016; 3017 # LEFT WHITE LENTICULAR BRACKET
|
||||
3017; 3016 # RIGHT WHITE LENTICULAR BRACKET
|
||||
3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET
|
||||
3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET
|
||||
301A; 301B # LEFT WHITE SQUARE BRACKET
|
||||
301B; 301A # RIGHT WHITE SQUARE BRACKET
|
||||
|
||||
# The following characters have no appropriate mirroring character
|
||||
|
||||
# 2201; COMPLEMENT
|
||||
# 2202; PARTIAL DIFFERENTIAL
|
||||
# 2203; THERE EXISTS
|
||||
# 2204; THERE DOES NOT EXIST
|
||||
# 2211; N-ARY SUMMATION
|
||||
# 2215; DIVISION SLASH
|
||||
# 2216; SET MINUS
|
||||
# 221A; SQUARE ROOT
|
||||
# 221B; CUBE ROOT
|
||||
# 221C; FOURTH ROOT
|
||||
# 221D; PROPORTIONAL TO
|
||||
# 221F; RIGHT ANGLE
|
||||
# 2220; ANGLE
|
||||
# 2221; MEASURED ANGLE
|
||||
# 2222; SPHERICAL ANGLE
|
||||
# 2224; DOES NOT DIVIDE
|
||||
# 2226; NOT PARALLEL TO
|
||||
# 222B; INTEGRAL
|
||||
# 222C; DOUBLE INTEGRAL
|
||||
# 222D; TRIPLE INTEGRAL
|
||||
# 222E; CONTOUR INTEGRAL
|
||||
# 222F; SURFACE INTEGRAL
|
||||
# 2230; VOLUME INTEGRAL
|
||||
# 2231; CLOCKWISE INTEGRAL
|
||||
# 2232; CLOCKWISE CONTOUR INTEGRAL
|
||||
# 2233; ANTICLOCKWISE CONTOUR INTEGRAL
|
||||
# 2239; EXCESS
|
||||
# 223B; HOMOTHETIC
|
||||
# 223E; INVERTED LAZY S
|
||||
# 223F; SINE WAVE
|
||||
# 2240; WREATH PRODUCT
|
||||
# 2241; NOT TILDE
|
||||
# 2242; MINUS TILDE
|
||||
# 2244; NOT ASYMPTOTICALLY EQUAL TO
|
||||
# 2245; APPROXIMATELY EQUAL TO
|
||||
# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO
|
||||
# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
|
||||
# 2248; ALMOST EQUAL TO
|
||||
# 2249; NOT ALMOST EQUAL TO
|
||||
# 224A; ALMOST EQUAL OR EQUAL TO
|
||||
# 224B; TRIPLE TILDE
|
||||
# 224C; ALL EQUAL TO
|
||||
# 225F; QUESTIONED EQUAL TO
|
||||
# 2260; NOT EQUAL TO
|
||||
# 2262; NOT IDENTICAL TO
|
||||
# 228C; MULTISET
|
||||
# 2298; CIRCLED DIVISION SLASH
|
||||
# 22A6; ASSERTION
|
||||
# 22A7; MODELS
|
||||
# 22A8; TRUE
|
||||
# 22A9; FORCES
|
||||
# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE
|
||||
# 22AB; DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
|
||||
# 22AC; DOES NOT PROVE
|
||||
# 22AD; NOT TRUE
|
||||
# 22AE; DOES NOT FORCE
|
||||
# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
|
||||
# 22B8; MULTIMAP
|
||||
# 22BE; RIGHT ANGLE WITH ARC
|
||||
# 22BF; RIGHT TRIANGLE
|
||||
# 2320; TOP HALF INTEGRAL
|
||||
# 2321; BOTTOM HALF INTEGRAL
|
|
@ -0,0 +1,345 @@
|
|||
#!/usr/local/bin/perl
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public
|
||||
# License Version 1.1 (the "License"); you may not use this file
|
||||
# except in compliance with the License. You may obtain a copy of
|
||||
# the License at http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS
|
||||
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
# implied. See the License for the specific language governing
|
||||
# rights and limitations under the License.
|
||||
#
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is IBM
|
||||
# Corporation. Portions created by IBM are
|
||||
# Copyright (C) 2000 IBM Corporation. All
|
||||
# Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
#
|
||||
|
||||
######################################################################
|
||||
#
|
||||
# Initial global variable
|
||||
#
|
||||
######################################################################
|
||||
|
||||
%gcount = ();
|
||||
%pat = ();
|
||||
|
||||
%map = (
|
||||
"L" => "1", # Left-to-Right
|
||||
"R" => "2", # Right-to-Left
|
||||
"AL" => "3", # Right-to-Left Arabic
|
||||
"AN" => "4", # Arabic Number
|
||||
"EN" => "5", # European Number
|
||||
"ES" => "6", # European Number Separator
|
||||
"ET" => "7", # European Number Terminator
|
||||
"CS" => "8", # Common Number Separator
|
||||
"ON" => "9", # Other Neutrals
|
||||
"NSM" => "10", # Non-Spacing Mark
|
||||
"BN" => "11", # Boundary Neutral
|
||||
"B" => "12", # Paragraph Separator
|
||||
"S" => "13", # Segment Separator
|
||||
"WS" => "14", # Whitespace
|
||||
"LRE" => "15", # Left-to-Right Embedding
|
||||
"RLE" => "15", # Right-to-Left Embedding
|
||||
"PDF" => "15", # Pop Directional Format
|
||||
"LRO" => "15", # Left-to-Right Override
|
||||
"RLO" => "15" # Right-to-Left Override
|
||||
);
|
||||
|
||||
%special = ();
|
||||
|
||||
######################################################################
|
||||
#
|
||||
# Open the unicode database file
|
||||
#
|
||||
######################################################################
|
||||
open ( UNICODATA , "< UnicodeData-Latest.txt")
|
||||
|| die "cannot find UnicodeData-Latest.txt";
|
||||
|
||||
######################################################################
|
||||
#
|
||||
# Open the output file
|
||||
#
|
||||
######################################################################
|
||||
open ( OUT , "> ../src/bidicattable.h")
|
||||
|| die "cannot open output ../src/bidicattable.h file";
|
||||
|
||||
######################################################################
|
||||
#
|
||||
# Generate license and header
|
||||
#
|
||||
######################################################################
|
||||
$npl = <<END_OF_NPL;
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License
|
||||
* Version 1.1 (the "MPL"); you may not use this file except in
|
||||
* compliance with the MPL. You may obtain a copy of the MPL at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the MPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* MPL.
|
||||
*
|
||||
* The Initial Developer of the Original Code is IBM
|
||||
* Corporation. Portions created by IBM are
|
||||
* Copyright (C) 2000 IBM Corporation. All
|
||||
* Rights Reserved.
|
||||
*/
|
||||
/*
|
||||
DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
|
||||
mozilla/intl/unicharutil/tools/genbidicattable.pl
|
||||
*/
|
||||
END_OF_NPL
|
||||
print OUT $npl;
|
||||
print OUT "\n#ifdef IBMBIDI\n\n";
|
||||
print OUT "#include \"nscore.h\" \n\n";
|
||||
|
||||
|
||||
%bidicategory = ();
|
||||
%sh = ();
|
||||
%sl = ();
|
||||
%sc = ();
|
||||
|
||||
######################################################################
|
||||
#
|
||||
# Process the file line by line
|
||||
#
|
||||
######################################################################
|
||||
while(<UNICODATA>) {
|
||||
chop;
|
||||
######################################################################
|
||||
#
|
||||
# Get value from fields
|
||||
#
|
||||
######################################################################
|
||||
@f = split(/;/ , $_);
|
||||
$c = $f[0]; # The unicode value
|
||||
$n = $f[1]; # The unicode name
|
||||
$g = $f[2]; # The General Category
|
||||
$b = $f[4]; # The Bidi Category
|
||||
|
||||
if(( substr($n, 0, 1) ne "<") || ($n eq "<control>"))
|
||||
{
|
||||
#
|
||||
# print $g;
|
||||
#
|
||||
|
||||
$gcount{$b}++;
|
||||
$bidicategory{$c} = $b;
|
||||
} else {
|
||||
|
||||
# Handle special block
|
||||
@pair=split(/, /, $n );
|
||||
$catnum = $map{$b};
|
||||
|
||||
# printf "[%s][%s] => %d\n", $pair[0], $pair[1], $catnum;
|
||||
if( $pair[1] eq "First>") {
|
||||
$sl{$pair[0]} = $c;
|
||||
$sc{$pair[0]} = $catnum;
|
||||
} elsif ( $pair[1] eq "Last>") {
|
||||
$sh{$pair[0]} = $c;
|
||||
if($sc{$pair[0]} ne $catnum)
|
||||
{
|
||||
print "WARNING !!!! error in handling special block\n\n";
|
||||
}
|
||||
} else {
|
||||
print "WARNING !!!! error in handling special block\n\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# XXX - How can this be made more flexible as new blocks are added to the UCDB?
|
||||
|
||||
@range = (
|
||||
0x0000, 0x07ff,
|
||||
0x0900, 0x18ff,
|
||||
0x1e00, 0x28ff,
|
||||
0x2e80, 0x33ff,
|
||||
0xa000, 0xa4ff,
|
||||
0xf900, 0xffff
|
||||
);
|
||||
|
||||
|
||||
$totaldata = 0;
|
||||
|
||||
$tt=($#range+1) / 2;
|
||||
@patarray = ();
|
||||
|
||||
|
||||
# This should improve performance: put all the patterns like 0x11111111, 0x22222222 etc at the beginning of the table.
|
||||
# Since there are a lot of blocks with the same category, we should be able to save a lot of time extracting the digits
|
||||
for (0..15) {
|
||||
$pattern = "0x".(sprintf("%X", $_) x 8);
|
||||
$patarray[$_] = $pattern;
|
||||
$pat{$pattern} = $_;
|
||||
}
|
||||
|
||||
$newidx = 0x10;
|
||||
|
||||
for($t = 1; $t <= $tt; $t++)
|
||||
{
|
||||
$tl = $range[($t-1) * 2];
|
||||
$th = $range[($t-1) * 2 + 1];
|
||||
$ts = ( $th - $tl ) >> 3;
|
||||
$totaldata += $ts + 1;
|
||||
printf OUT "static PRUint8 gBidiCatIdx%d[%d] = {\n", $t, $ts + 1;
|
||||
for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
|
||||
{
|
||||
$data = 0;
|
||||
|
||||
for($j = 0; $j < 8 ; $j++)
|
||||
{
|
||||
#defaults for unassigned characters -- see table 3.7 in the Unicode Bidi Algorithm
|
||||
$test = ($i << 3) + $j;
|
||||
if ((($test >= 0x0590) && ($test <= 0x5FF))
|
||||
|| (($test >= 0xFB1D) && ($test <= 0xFB4F)))
|
||||
{
|
||||
$default = $map{"R"};
|
||||
} elsif ((($test >= 0x0600) && ($test <= 0x7BF))
|
||||
|| (($test >= 0xFB50) && ($test <= 0xFDFF))
|
||||
|| (($test >= 0xFE70) && ($test <= 0xFEFF)))
|
||||
{
|
||||
$default = $map{"AL"};
|
||||
} else
|
||||
{
|
||||
$default = $map{"L"};
|
||||
}
|
||||
$k = sprintf("%04X", (($i << 3) + $j));
|
||||
|
||||
$cat = $bidicategory{$k};
|
||||
if( $cat eq "")
|
||||
{
|
||||
$data = $data + ($default << (4*$j));
|
||||
} else {
|
||||
$data = $data + ($map{$cat} << (4*$j));
|
||||
}
|
||||
|
||||
}
|
||||
$pattern = sprintf("0x%08X", $data);
|
||||
|
||||
$idx = $pat{$pattern};
|
||||
unless( exists($pat{$pattern})){
|
||||
$idx = $newidx++;
|
||||
$patarray[$idx] = $pattern;
|
||||
$pat{$pattern} = $idx;
|
||||
}
|
||||
|
||||
printf OUT " %3d, /* U+%04X - U+%04X : %s */\n" ,
|
||||
$idx, ($i << 3),((($i +1)<< 3)-1), $pattern ;
|
||||
|
||||
|
||||
}
|
||||
printf OUT "};\n\n";
|
||||
|
||||
if($t ne $tt)
|
||||
{
|
||||
$tl = $range[($t-1) * 2 + 1] + 1;
|
||||
$th = $range[$t * 2] - 1;
|
||||
for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
|
||||
{
|
||||
$data = 0;
|
||||
for($j = 0; $j < 8 ; $j++)
|
||||
{
|
||||
$k = sprintf("%04X", (($i << 3) + $j));
|
||||
|
||||
$cat = $bidicategory{$k};
|
||||
if( $cat ne "")
|
||||
{
|
||||
$data = $data + ($map{$cat} << (4*$j));
|
||||
}
|
||||
}
|
||||
$pattern = sprintf("0x%08X", $data);
|
||||
if($data ne 0)
|
||||
{
|
||||
print "WARNING, Unicode Database now contain characters" .
|
||||
"which we have not consider, change this program !!!\n\n";
|
||||
printf "Problem- U+%04X - U+%04X range\n", ($i << 3),((($i +1)<< 3)-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if($newidx > 255)
|
||||
{
|
||||
die "We have more than 255 patterns !!! - $newidx\n\n" .
|
||||
"This program is now broken!!!\n\n\n";
|
||||
|
||||
}
|
||||
printf OUT "static PRUint32 gBidiCatPat[$newidx] = {\n";
|
||||
for($i = 0 ; $i < $newidx; $i++)
|
||||
{
|
||||
printf OUT " %s, /* $i */\n", $patarray[$i] ;
|
||||
}
|
||||
printf OUT "};\n\n";
|
||||
$totaldata += $newidx * 4;
|
||||
|
||||
printf OUT "static eBidiCategory GetBidiCat(PRUnichar u)\n{\n";
|
||||
printf OUT " PRUint32 pat;\n";
|
||||
printf OUT " PRUint16 patidx;\n\n";
|
||||
printf OUT " /* Handle blocks which use index table mapping */ \n\n";
|
||||
for($t = 1; $t <= $tt; $t++)
|
||||
{
|
||||
$tl = $range[($t-1) * 2];
|
||||
$th = $range[($t-1) * 2 + 1];
|
||||
if ($tl == 0) {
|
||||
printf OUT " /* Handle U+%04X to U+%04X */\n", $tl, $th;
|
||||
printf OUT " if (u<=((PRUnichar)0x%04X)) {\n", $th;
|
||||
printf OUT " patidx = gBidiCatIdx%d [( u >> 3 )];\n", $t;
|
||||
} elsif ($th == 0xFFFF) {
|
||||
printf OUT " /* Handle U+%04X to U+%04X */\n", $tl, $th;
|
||||
printf OUT " if (((PRUnichar)0x%04X)<=u) {\n", $tl;
|
||||
printf OUT " patidx = gBidiCatIdx%d [( (u -(PRUnichar) 0x%04X) >> 3 )];\n", $t, $tl;
|
||||
} else {
|
||||
printf OUT " /* Handle U+%04X to U+%04X */\n", $tl, $th;
|
||||
printf OUT " if ((((PRUnichar)0x%04X)<=u)&&(u<=((PRUnichar)0x%04X))) {\n", $tl, $th;
|
||||
printf OUT " patidx = gBidiCatIdx%d [( (u -(PRUnichar) 0x%04X) >> 3 )];\n", $t, $tl;
|
||||
}
|
||||
printf OUT " if (patidx < 0x10)\n";
|
||||
printf OUT " return (eBidiCategory)patidx;\n";
|
||||
printf OUT " else {\n";
|
||||
printf OUT " pat = gBidiCatPat[patidx];\n";
|
||||
printf OUT " return (eBidiCategory)((pat >> ((u % 8) * 4)) & 0x0F);\n";
|
||||
printf OUT " }\n";
|
||||
printf OUT " }\n\n";
|
||||
}
|
||||
|
||||
@special = keys(%sh);
|
||||
$sp = 0;
|
||||
foreach $s ( sort(@special) ) {
|
||||
# don't bother to define the special blocks unless they have a different
|
||||
# value from the default they would be given if they were undefined
|
||||
unless ($sc{$s} == $map{"L"}) {
|
||||
unless ($sp++) {
|
||||
%by_value = reverse %map;
|
||||
printf OUT " /* Handle blocks which share the same category */\n\n";
|
||||
}
|
||||
printf OUT " /* Handle %s block */\n", substr($s, 1);
|
||||
printf OUT " if((((PRUnichar)0x%s)<=u)&&(u<=((PRUnichar)0x%s))) \n", $sl{$s}, $sh{$s};
|
||||
printf OUT " return eBidiCat_$by_value{$sc{$s}}; \n\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
printf OUT " return eBidiCat_L; /* UNDEFINE = L */\n};\n";
|
||||
|
||||
printf OUT "/* total data size = $totaldata */\n";
|
||||
printf OUT "\n#endif /* IBMBIDI */\n";
|
||||
print "total = $totaldata\n";
|
||||
|
||||
######################################################################
|
||||
#
|
||||
# Close files
|
||||
#
|
||||
######################################################################
|
||||
close(UNIDATA);
|
||||
close(OUT);
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
#!/usr/local/bin/perl
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public
|
||||
# License Version 1.1 (the "License"); you may not use this file
|
||||
# except in compliance with the License. You may obtain a copy of
|
||||
# the License at http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS
|
||||
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
# implied. See the License for the specific language governing
|
||||
# rights and limitations under the License.
|
||||
#
|
||||
# The Original Code is IBM code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is IBM.
|
||||
# Portions created by IBM are
|
||||
# Copyright (C) International Business Machines
|
||||
# Corporation, 2000. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s): Simon Montagu
|
||||
#
|
||||
|
||||
# This program generates the header file symmtable.h from the Unicode
|
||||
# informative data file BidiMirroring.txt.
|
||||
# See the comments in that file for details of its structure and contents.
|
||||
|
||||
# Process the input file
|
||||
$ucp = "[0-9a-fA-F]{4}"; # Unicode code point (4 successive hex digits) as a pattern to match
|
||||
open ( UNICODATA , "< BidiMirroring.txt")
|
||||
|| die "Cannot find BidiMirroring.txt.\
|
||||
The file should be avaiable here:\
|
||||
http://www.unicode.org/Public/UNIDATA/BidiMirroring.txt\n";
|
||||
|
||||
while (<UNICODATA>) {
|
||||
chop;
|
||||
if (/^($ucp); ($ucp) # (.+)/) { # If the line looks like this pattern
|
||||
# (example: 0028; 0029 # LEFT PARENTHESIS)
|
||||
@table[hex($1)]=hex($1) ^ hex($2); # Enter the character XOR its symmetric pair in the table
|
||||
@isblock[hex(substr($1, 0, 2))]=1; # Remember this block
|
||||
}
|
||||
elsif (/^# ($ucp); (.+)/) { # If the line looks like this pattern
|
||||
# (example: # 2201; COMPLEMENT)
|
||||
@table[hex($1)]=0xff; # Enter 0xff in the table
|
||||
@isblock[hex(substr($1, 0, 2))]=2; # Remember this block
|
||||
}
|
||||
}
|
||||
close(UNICODATA);
|
||||
|
||||
# Generate license and header
|
||||
open ( OUT , "> ../src/symmtable.h")
|
||||
|| die "cannot open output ../src/symmtable.h file";
|
||||
$npl = <<END_OF_NPL;
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public
|
||||
* License Version 1.1 (the "License"); you may not use this file
|
||||
* except in compliance with the License. You may obtain a copy of
|
||||
* the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS
|
||||
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is IBM code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is IBM.
|
||||
* Portions created by IBM are
|
||||
* Copyright (C) International Business Machines
|
||||
* Corporation, 2000. All Rights Reserved.
|
||||
*/
|
||||
/*
|
||||
DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
|
||||
mozilla/intl/unicharutil/tools/gensymmtable.pl
|
||||
*/
|
||||
END_OF_NPL
|
||||
print OUT $npl;
|
||||
|
||||
print OUT "#ifdef IBMBIDI\n\n";
|
||||
print OUT "#ifdef HANDLE_GLYPHS_WITHOUT_MATES\n";
|
||||
print OUT "#define GWM 0xff\n";
|
||||
print OUT "#else\n";
|
||||
print OUT "#define GWM 0\n";
|
||||
print OUT "#endif\n";
|
||||
|
||||
# Generate data tables
|
||||
foreach $block (0 .. 0xff) {
|
||||
if (@isblock[$block]) {
|
||||
printf OUT "\n/* Block U%02X__ */\n", $block;
|
||||
printf OUT "const static PRUint8 symmtable_%02X[256] = {\n", $block;
|
||||
print OUT "/* ";
|
||||
foreach $byte (0 .. 0xf) {
|
||||
printf OUT " _%X ", $byte;
|
||||
}
|
||||
print OUT "*/\n";
|
||||
foreach $row (0 .. 0xf) {
|
||||
printf OUT "/* %X_ */ ", $row;
|
||||
foreach $byte (0 .. 0xf) {
|
||||
$ix = ($block << 8) | ($row << 4) | ($byte);
|
||||
printf OUT ((0xff == @table[$ix]) ? " GWM, " : "%#4x, ", @table[$ix]);
|
||||
}
|
||||
print OUT "\n";
|
||||
}
|
||||
print OUT "};\n";
|
||||
}
|
||||
}
|
||||
|
||||
# Generate conversion method
|
||||
print OUT "\nstatic PRUnichar Mirrored(PRUnichar u)\n{\n";
|
||||
print OUT "#ifdef HANDLE_GLYPHS_WITHOUT_MATES\n";
|
||||
print OUT " PRUint8 mask;\n";
|
||||
print OUT "#endif\n\n";
|
||||
print OUT " switch (u & 0xFF00) {\n";
|
||||
foreach $block (0 .. 0xff) {
|
||||
if (1==@isblock[$block]) {
|
||||
printf OUT "\n case %#x:\n", $block * 256;
|
||||
printf OUT " u ^= symmtable_%02X[u & 0xff];\n", $block;
|
||||
print OUT " break;\n";
|
||||
}
|
||||
elsif (2==@isblock[$block]) {
|
||||
print OUT "#ifdef HANDLE_GLYPHS_WITHOUT_MATES // placeholder for code to do something in these cases\n";
|
||||
printf OUT " case %#x:\n", $block * 256;
|
||||
printf OUT " mask = symmtable_%02X[u & 0xff];\n", $block;
|
||||
print OUT " if (GWM == mask)\n";
|
||||
print OUT " ; // Do something\n";
|
||||
print OUT " else\n";
|
||||
print OUT " u ^= mask;\n";
|
||||
print OUT " break;\n";
|
||||
print OUT "#else\n";
|
||||
printf OUT " case %#x:\n", $block * 256;
|
||||
printf OUT " u ^= symmtable_%02X[u & 0xff];\n", $block;
|
||||
print OUT " break;\n";
|
||||
print OUT "#endif\n";
|
||||
}
|
||||
}
|
||||
print OUT " }\n return u;\n}\n";
|
||||
printf OUT "#endif // IBMBIDI\n";
|
||||
close(OUT);
|
Загрузка…
Ссылка в новой задаче