зеркало из https://github.com/mozilla/gecko-dev.git
check in bidi tools and data files for smontagu@il.ibm.com
r=ftang@netscape.com sr=erik@netscape.com b=62777 there are no direct connection to the build system so it won't impact and build status on tinderbox
This commit is contained in:
Родитель
e3864a18c9
Коммит
2e64c3a69c
|
@ -0,0 +1,238 @@
|
||||||
|
# BidiMirroring-1.txt
|
||||||
|
#
|
||||||
|
# This file is an informative supplement to the UnicodeData file. It
|
||||||
|
# lists characters that have the mirrored property
|
||||||
|
# where there is another Unicode character that typically has a glyph
|
||||||
|
# that is the mirror image of the original character's glyph.
|
||||||
|
# The repertoire covered by the file is Unicode 3.0.1.
|
||||||
|
#
|
||||||
|
# The file contains a list of lines with mappings from one code point
|
||||||
|
# to another one for character-based mirroring.
|
||||||
|
# Note that for "real" mirroring, a rendering engine needs to select
|
||||||
|
# appropriate alternative glyphs, and that many Unicode characters do not
|
||||||
|
# have a mirror-image Unicode character.
|
||||||
|
#
|
||||||
|
# Each mapping line contains two fields, separated by a semicolon (';').
|
||||||
|
# Each of the two fields contains a code point represented as a
|
||||||
|
# variable-length hexadecimal value with 4 to 6 digits.
|
||||||
|
# A comment indicates where the characters are "BEST FIT" mirroring.
|
||||||
|
#
|
||||||
|
# Code points with the "mirrored" property but no appropriate mirrors are
|
||||||
|
# listed as comments at the end of the file.
|
||||||
|
#
|
||||||
|
# For information on bidi mirroring, see UTR #21: Bidirectional Algorithm,
|
||||||
|
# at http://www.unicode.org/unicode/reports/tr9/
|
||||||
|
#
|
||||||
|
# Please address any comments to <errata@unicode.org>.
|
||||||
|
# Note that this is an archival address: messages will be checked,
|
||||||
|
# but do not expect an immediate response.
|
||||||
|
#
|
||||||
|
# This file was originally created by Markus Scherer
|
||||||
|
#
|
||||||
|
# ############################################################
|
||||||
|
|
||||||
|
0028; 0029 # LEFT PARENTHESIS
|
||||||
|
0029; 0028 # RIGHT PARENTHESIS
|
||||||
|
003C; 003E # LESS-THAN SIGN
|
||||||
|
003E; 003C # GREATER-THAN SIGN
|
||||||
|
005B; 005D # LEFT SQUARE BRACKET
|
||||||
|
005D; 005B # RIGHT SQUARE BRACKET
|
||||||
|
007B; 007D # LEFT CURLY BRACKET
|
||||||
|
007D; 007B # RIGHT CURLY BRACKET
|
||||||
|
00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||||
|
00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||||
|
2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||||
|
203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||||
|
2045; 2046 # LEFT SQUARE BRACKET WITH QUILL
|
||||||
|
2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL
|
||||||
|
207D; 207E # SUPERSCRIPT LEFT PARENTHESIS
|
||||||
|
207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS
|
||||||
|
208D; 208E # SUBSCRIPT LEFT PARENTHESIS
|
||||||
|
208E; 208D # SUBSCRIPT RIGHT PARENTHESIS
|
||||||
|
2208; 220B # ELEMENT OF
|
||||||
|
2209; 220C # NOT AN ELEMENT OF
|
||||||
|
220A; 220D # SMALL ELEMENT OF
|
||||||
|
220B; 2208 # CONTAINS AS MEMBER
|
||||||
|
220C; 2209 # DOES NOT CONTAIN AS MEMBER
|
||||||
|
220D; 220A # SMALL CONTAINS AS MEMBER
|
||||||
|
223C; 223D # TILDE OPERATOR
|
||||||
|
223D; 223C # REVERSED TILDE
|
||||||
|
2243; 22CD # ASYMPTOTICALLY EQUAL TO
|
||||||
|
2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
|
||||||
|
2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO
|
||||||
|
2254; 2255 # COLON EQUALS
|
||||||
|
2255; 2254 # EQUALS COLON
|
||||||
|
2264; 2265 # LESS-THAN OR EQUAL TO
|
||||||
|
2265; 2264 # GREATER-THAN OR EQUAL TO
|
||||||
|
2266; 2267 # LESS-THAN OVER EQUAL TO
|
||||||
|
2267; 2266 # GREATER-THAN OVER EQUAL TO
|
||||||
|
2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO
|
||||||
|
2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO
|
||||||
|
226A; 226B # MUCH LESS-THAN
|
||||||
|
226B; 226A # MUCH GREATER-THAN
|
||||||
|
226E; 226F # [BEST FIT] NOT LESS-THAN
|
||||||
|
226F; 226E # [BEST FIT] NOT GREATER-THAN
|
||||||
|
2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO
|
||||||
|
2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO
|
||||||
|
2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO
|
||||||
|
2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO
|
||||||
|
2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO
|
||||||
|
2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO
|
||||||
|
2276; 2277 # LESS-THAN OR GREATER-THAN
|
||||||
|
2277; 2276 # GREATER-THAN OR LESS-THAN
|
||||||
|
2278; 2279 # NEITHER LESS-THAN NOR GREATER-THAN
|
||||||
|
2279; 2278 # NEITHER GREATER-THAN NOR LESS-THAN
|
||||||
|
227A; 227B # PRECEDES
|
||||||
|
227B; 227A # SUCCEEDS
|
||||||
|
227C; 227D # PRECEDES OR EQUAL TO
|
||||||
|
227D; 227C # SUCCEEDS OR EQUAL TO
|
||||||
|
227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO
|
||||||
|
227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO
|
||||||
|
2280; 2281 # [BEST FIT] DOES NOT PRECEDE
|
||||||
|
2281; 2280 # [BEST FIT] DOES NOT SUCCEED
|
||||||
|
2282; 2283 # SUBSET OF
|
||||||
|
2283; 2282 # SUPERSET OF
|
||||||
|
2284; 2285 # [BEST FIT] NOT A SUBSET OF
|
||||||
|
2285; 2284 # [BEST FIT] NOT A SUPERSET OF
|
||||||
|
2286; 2287 # SUBSET OF OR EQUAL TO
|
||||||
|
2287; 2286 # SUPERSET OF OR EQUAL TO
|
||||||
|
2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO
|
||||||
|
2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO
|
||||||
|
228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO
|
||||||
|
228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO
|
||||||
|
228F; 2290 # SQUARE IMAGE OF
|
||||||
|
2290; 228F # SQUARE ORIGINAL OF
|
||||||
|
2291; 2292 # SQUARE IMAGE OF OR EQUAL TO
|
||||||
|
2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO
|
||||||
|
22A2; 22A3 # RIGHT TACK
|
||||||
|
22A3; 22A2 # LEFT TACK
|
||||||
|
22B0; 22B1 # PRECEDES UNDER RELATION
|
||||||
|
22B1; 22B0 # SUCCEEDS UNDER RELATION
|
||||||
|
22B2; 22B3 # NORMAL SUBGROUP OF
|
||||||
|
22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP
|
||||||
|
22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO
|
||||||
|
22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
|
||||||
|
22B6; 22B7 # ORIGINAL OF
|
||||||
|
22B7; 22B6 # IMAGE OF
|
||||||
|
22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
|
||||||
|
22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
|
||||||
|
22CB; 22CC # LEFT SEMIDIRECT PRODUCT
|
||||||
|
22CC; 22CB # RIGHT SEMIDIRECT PRODUCT
|
||||||
|
22CD; 2243 # REVERSED TILDE EQUALS
|
||||||
|
22D0; 22D1 # DOUBLE SUBSET
|
||||||
|
22D1; 22D0 # DOUBLE SUPERSET
|
||||||
|
22D6; 22D7 # LESS-THAN WITH DOT
|
||||||
|
22D7; 22D6 # GREATER-THAN WITH DOT
|
||||||
|
22D8; 22D9 # VERY MUCH LESS-THAN
|
||||||
|
22D9; 22D8 # VERY MUCH GREATER-THAN
|
||||||
|
22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN
|
||||||
|
22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN
|
||||||
|
22DC; 22DD # EQUAL TO OR LESS-THAN
|
||||||
|
22DD; 22DC # EQUAL TO OR GREATER-THAN
|
||||||
|
22DE; 22DF # EQUAL TO OR PRECEDES
|
||||||
|
22DF; 22DE # EQUAL TO OR SUCCEEDS
|
||||||
|
22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL
|
||||||
|
22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL
|
||||||
|
22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO
|
||||||
|
22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO
|
||||||
|
22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO
|
||||||
|
22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO
|
||||||
|
22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO
|
||||||
|
22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO
|
||||||
|
22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO
|
||||||
|
22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO
|
||||||
|
22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF
|
||||||
|
22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP
|
||||||
|
22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO
|
||||||
|
22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
|
||||||
|
22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS
|
||||||
|
22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS
|
||||||
|
2308; 2309 # LEFT CEILING
|
||||||
|
2309; 2308 # RIGHT CEILING
|
||||||
|
230A; 230B # LEFT FLOOR
|
||||||
|
230B; 230A # RIGHT FLOOR
|
||||||
|
2329; 232A # LEFT-POINTING ANGLE BRACKET
|
||||||
|
232A; 2329 # RIGHT-POINTING ANGLE BRACKET
|
||||||
|
3008; 3009 # LEFT ANGLE BRACKET
|
||||||
|
3009; 3008 # RIGHT ANGLE BRACKET
|
||||||
|
300A; 300B # LEFT DOUBLE ANGLE BRACKET
|
||||||
|
300B; 300A # RIGHT DOUBLE ANGLE BRACKET
|
||||||
|
300C; 300D # [BEST FIT] LEFT CORNER BRACKET
|
||||||
|
300D; 300C # [BEST FIT] RIGHT CORNER BRACKET
|
||||||
|
300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET
|
||||||
|
300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET
|
||||||
|
3010; 3011 # LEFT BLACK LENTICULAR BRACKET
|
||||||
|
3011; 3010 # RIGHT BLACK LENTICULAR BRACKET
|
||||||
|
3014; 3015 # [BEST FIT] LEFT TORTOISE SHELL BRACKET
|
||||||
|
3015; 3014 # [BEST FIT] RIGHT TORTOISE SHELL BRACKET
|
||||||
|
3016; 3017 # LEFT WHITE LENTICULAR BRACKET
|
||||||
|
3017; 3016 # RIGHT WHITE LENTICULAR BRACKET
|
||||||
|
3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET
|
||||||
|
3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET
|
||||||
|
301A; 301B # LEFT WHITE SQUARE BRACKET
|
||||||
|
301B; 301A # RIGHT WHITE SQUARE BRACKET
|
||||||
|
|
||||||
|
# The following characters have no appropriate mirroring character
|
||||||
|
|
||||||
|
# 2201; COMPLEMENT
|
||||||
|
# 2202; PARTIAL DIFFERENTIAL
|
||||||
|
# 2203; THERE EXISTS
|
||||||
|
# 2204; THERE DOES NOT EXIST
|
||||||
|
# 2211; N-ARY SUMMATION
|
||||||
|
# 2215; DIVISION SLASH
|
||||||
|
# 2216; SET MINUS
|
||||||
|
# 221A; SQUARE ROOT
|
||||||
|
# 221B; CUBE ROOT
|
||||||
|
# 221C; FOURTH ROOT
|
||||||
|
# 221D; PROPORTIONAL TO
|
||||||
|
# 221F; RIGHT ANGLE
|
||||||
|
# 2220; ANGLE
|
||||||
|
# 2221; MEASURED ANGLE
|
||||||
|
# 2222; SPHERICAL ANGLE
|
||||||
|
# 2224; DOES NOT DIVIDE
|
||||||
|
# 2226; NOT PARALLEL TO
|
||||||
|
# 222B; INTEGRAL
|
||||||
|
# 222C; DOUBLE INTEGRAL
|
||||||
|
# 222D; TRIPLE INTEGRAL
|
||||||
|
# 222E; CONTOUR INTEGRAL
|
||||||
|
# 222F; SURFACE INTEGRAL
|
||||||
|
# 2230; VOLUME INTEGRAL
|
||||||
|
# 2231; CLOCKWISE INTEGRAL
|
||||||
|
# 2232; CLOCKWISE CONTOUR INTEGRAL
|
||||||
|
# 2233; ANTICLOCKWISE CONTOUR INTEGRAL
|
||||||
|
# 2239; EXCESS
|
||||||
|
# 223B; HOMOTHETIC
|
||||||
|
# 223E; INVERTED LAZY S
|
||||||
|
# 223F; SINE WAVE
|
||||||
|
# 2240; WREATH PRODUCT
|
||||||
|
# 2241; NOT TILDE
|
||||||
|
# 2242; MINUS TILDE
|
||||||
|
# 2244; NOT ASYMPTOTICALLY EQUAL TO
|
||||||
|
# 2245; APPROXIMATELY EQUAL TO
|
||||||
|
# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO
|
||||||
|
# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
|
||||||
|
# 2248; ALMOST EQUAL TO
|
||||||
|
# 2249; NOT ALMOST EQUAL TO
|
||||||
|
# 224A; ALMOST EQUAL OR EQUAL TO
|
||||||
|
# 224B; TRIPLE TILDE
|
||||||
|
# 224C; ALL EQUAL TO
|
||||||
|
# 225F; QUESTIONED EQUAL TO
|
||||||
|
# 2260; NOT EQUAL TO
|
||||||
|
# 2262; NOT IDENTICAL TO
|
||||||
|
# 228C; MULTISET
|
||||||
|
# 2298; CIRCLED DIVISION SLASH
|
||||||
|
# 22A6; ASSERTION
|
||||||
|
# 22A7; MODELS
|
||||||
|
# 22A8; TRUE
|
||||||
|
# 22A9; FORCES
|
||||||
|
# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE
|
||||||
|
# 22AB; DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
|
||||||
|
# 22AC; DOES NOT PROVE
|
||||||
|
# 22AD; NOT TRUE
|
||||||
|
# 22AE; DOES NOT FORCE
|
||||||
|
# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
|
||||||
|
# 22B8; MULTIMAP
|
||||||
|
# 22BE; RIGHT ANGLE WITH ARC
|
||||||
|
# 22BF; RIGHT TRIANGLE
|
||||||
|
# 2320; TOP HALF INTEGRAL
|
||||||
|
# 2321; BOTTOM HALF INTEGRAL
|
|
@ -0,0 +1,345 @@
|
||||||
|
#!/usr/local/bin/perl
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the Mozilla Public
|
||||||
|
# License Version 1.1 (the "License"); you may not use this file
|
||||||
|
# except in compliance with the License. You may obtain a copy of
|
||||||
|
# the License at http://www.mozilla.org/MPL/
|
||||||
|
#
|
||||||
|
# Software distributed under the License is distributed on an "AS
|
||||||
|
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||||
|
# implied. See the License for the specific language governing
|
||||||
|
# rights and limitations under the License.
|
||||||
|
#
|
||||||
|
# The Original Code is mozilla.org code.
|
||||||
|
#
|
||||||
|
# The Initial Developer of the Original Code is IBM
|
||||||
|
# Corporation. Portions created by IBM are
|
||||||
|
# Copyright (C) 2000 IBM Corporation. All
|
||||||
|
# Rights Reserved.
|
||||||
|
#
|
||||||
|
# Contributor(s):
|
||||||
|
#
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
#
|
||||||
|
# Initial global variable
|
||||||
|
#
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
%gcount = ();
|
||||||
|
%pat = ();
|
||||||
|
|
||||||
|
%map = (
|
||||||
|
"L" => "1", # Left-to-Right
|
||||||
|
"R" => "2", # Right-to-Left
|
||||||
|
"AL" => "3", # Right-to-Left Arabic
|
||||||
|
"AN" => "4", # Arabic Number
|
||||||
|
"EN" => "5", # European Number
|
||||||
|
"ES" => "6", # European Number Separator
|
||||||
|
"ET" => "7", # European Number Terminator
|
||||||
|
"CS" => "8", # Common Number Separator
|
||||||
|
"ON" => "9", # Other Neutrals
|
||||||
|
"NSM" => "10", # Non-Spacing Mark
|
||||||
|
"BN" => "11", # Boundary Neutral
|
||||||
|
"B" => "12", # Paragraph Separator
|
||||||
|
"S" => "13", # Segment Separator
|
||||||
|
"WS" => "14", # Whitespace
|
||||||
|
"LRE" => "15", # Left-to-Right Embedding
|
||||||
|
"RLE" => "15", # Right-to-Left Embedding
|
||||||
|
"PDF" => "15", # Pop Directional Format
|
||||||
|
"LRO" => "15", # Left-to-Right Override
|
||||||
|
"RLO" => "15" # Right-to-Left Override
|
||||||
|
);
|
||||||
|
|
||||||
|
%special = ();
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
#
|
||||||
|
# Open the unicode database file
|
||||||
|
#
|
||||||
|
######################################################################
|
||||||
|
open ( UNICODATA , "< UnicodeData-Latest.txt")
|
||||||
|
|| die "cannot find UnicodeData-Latest.txt";
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
#
|
||||||
|
# Open the output file
|
||||||
|
#
|
||||||
|
######################################################################
|
||||||
|
open ( OUT , "> ../src/bidicattable.h")
|
||||||
|
|| die "cannot open output ../src/bidicattable.h file";
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
#
|
||||||
|
# Generate license and header
|
||||||
|
#
|
||||||
|
######################################################################
|
||||||
|
$npl = <<END_OF_NPL;
|
||||||
|
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License
|
||||||
|
* Version 1.1 (the "MPL"); you may not use this file except in
|
||||||
|
* compliance with the MPL. You may obtain a copy of the MPL at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the MPL is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* MPL.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is IBM
|
||||||
|
* Corporation. Portions created by IBM are
|
||||||
|
* Copyright (C) 2000 IBM Corporation. All
|
||||||
|
* Rights Reserved.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
|
||||||
|
mozilla/intl/unicharutil/tools/genbidicattable.pl
|
||||||
|
*/
|
||||||
|
END_OF_NPL
|
||||||
|
print OUT $npl;
|
||||||
|
print OUT "\n#ifdef IBMBIDI\n\n";
|
||||||
|
print OUT "#include \"nscore.h\" \n\n";
|
||||||
|
|
||||||
|
|
||||||
|
%bidicategory = ();
|
||||||
|
%sh = ();
|
||||||
|
%sl = ();
|
||||||
|
%sc = ();
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
#
|
||||||
|
# Process the file line by line
|
||||||
|
#
|
||||||
|
######################################################################
|
||||||
|
while(<UNICODATA>) {
|
||||||
|
chop;
|
||||||
|
######################################################################
|
||||||
|
#
|
||||||
|
# Get value from fields
|
||||||
|
#
|
||||||
|
######################################################################
|
||||||
|
@f = split(/;/ , $_);
|
||||||
|
$c = $f[0]; # The unicode value
|
||||||
|
$n = $f[1]; # The unicode name
|
||||||
|
$g = $f[2]; # The General Category
|
||||||
|
$b = $f[4]; # The Bidi Category
|
||||||
|
|
||||||
|
if(( substr($n, 0, 1) ne "<") || ($n eq "<control>"))
|
||||||
|
{
|
||||||
|
#
|
||||||
|
# print $g;
|
||||||
|
#
|
||||||
|
|
||||||
|
$gcount{$b}++;
|
||||||
|
$bidicategory{$c} = $b;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
# Handle special block
|
||||||
|
@pair=split(/, /, $n );
|
||||||
|
$catnum = $map{$b};
|
||||||
|
|
||||||
|
# printf "[%s][%s] => %d\n", $pair[0], $pair[1], $catnum;
|
||||||
|
if( $pair[1] eq "First>") {
|
||||||
|
$sl{$pair[0]} = $c;
|
||||||
|
$sc{$pair[0]} = $catnum;
|
||||||
|
} elsif ( $pair[1] eq "Last>") {
|
||||||
|
$sh{$pair[0]} = $c;
|
||||||
|
if($sc{$pair[0]} ne $catnum)
|
||||||
|
{
|
||||||
|
print "WARNING !!!! error in handling special block\n\n";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print "WARNING !!!! error in handling special block\n\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# XXX - How can this be made more flexible as new blocks are added to the UCDB?
|
||||||
|
|
||||||
|
@range = (
|
||||||
|
0x0000, 0x07ff,
|
||||||
|
0x0900, 0x18ff,
|
||||||
|
0x1e00, 0x28ff,
|
||||||
|
0x2e80, 0x33ff,
|
||||||
|
0xa000, 0xa4ff,
|
||||||
|
0xf900, 0xffff
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
$totaldata = 0;
|
||||||
|
|
||||||
|
$tt=($#range+1) / 2;
|
||||||
|
@patarray = ();
|
||||||
|
|
||||||
|
|
||||||
|
# This should improve performance: put all the patterns like 0x11111111, 0x22222222 etc at the beginning of the table.
|
||||||
|
# Since there are a lot of blocks with the same category, we should be able to save a lot of time extracting the digits
|
||||||
|
for (0..15) {
|
||||||
|
$pattern = "0x".(sprintf("%X", $_) x 8);
|
||||||
|
$patarray[$_] = $pattern;
|
||||||
|
$pat{$pattern} = $_;
|
||||||
|
}
|
||||||
|
|
||||||
|
$newidx = 0x10;
|
||||||
|
|
||||||
|
for($t = 1; $t <= $tt; $t++)
|
||||||
|
{
|
||||||
|
$tl = $range[($t-1) * 2];
|
||||||
|
$th = $range[($t-1) * 2 + 1];
|
||||||
|
$ts = ( $th - $tl ) >> 3;
|
||||||
|
$totaldata += $ts + 1;
|
||||||
|
printf OUT "static PRUint8 gBidiCatIdx%d[%d] = {\n", $t, $ts + 1;
|
||||||
|
for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
|
||||||
|
{
|
||||||
|
$data = 0;
|
||||||
|
|
||||||
|
for($j = 0; $j < 8 ; $j++)
|
||||||
|
{
|
||||||
|
#defaults for unassigned characters -- see table 3.7 in the Unicode Bidi Algorithm
|
||||||
|
$test = ($i << 3) + $j;
|
||||||
|
if ((($test >= 0x0590) && ($test <= 0x5FF))
|
||||||
|
|| (($test >= 0xFB1D) && ($test <= 0xFB4F)))
|
||||||
|
{
|
||||||
|
$default = $map{"R"};
|
||||||
|
} elsif ((($test >= 0x0600) && ($test <= 0x7BF))
|
||||||
|
|| (($test >= 0xFB50) && ($test <= 0xFDFF))
|
||||||
|
|| (($test >= 0xFE70) && ($test <= 0xFEFF)))
|
||||||
|
{
|
||||||
|
$default = $map{"AL"};
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
$default = $map{"L"};
|
||||||
|
}
|
||||||
|
$k = sprintf("%04X", (($i << 3) + $j));
|
||||||
|
|
||||||
|
$cat = $bidicategory{$k};
|
||||||
|
if( $cat eq "")
|
||||||
|
{
|
||||||
|
$data = $data + ($default << (4*$j));
|
||||||
|
} else {
|
||||||
|
$data = $data + ($map{$cat} << (4*$j));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
$pattern = sprintf("0x%08X", $data);
|
||||||
|
|
||||||
|
$idx = $pat{$pattern};
|
||||||
|
unless( exists($pat{$pattern})){
|
||||||
|
$idx = $newidx++;
|
||||||
|
$patarray[$idx] = $pattern;
|
||||||
|
$pat{$pattern} = $idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf OUT " %3d, /* U+%04X - U+%04X : %s */\n" ,
|
||||||
|
$idx, ($i << 3),((($i +1)<< 3)-1), $pattern ;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
printf OUT "};\n\n";
|
||||||
|
|
||||||
|
if($t ne $tt)
|
||||||
|
{
|
||||||
|
$tl = $range[($t-1) * 2 + 1] + 1;
|
||||||
|
$th = $range[$t * 2] - 1;
|
||||||
|
for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
|
||||||
|
{
|
||||||
|
$data = 0;
|
||||||
|
for($j = 0; $j < 8 ; $j++)
|
||||||
|
{
|
||||||
|
$k = sprintf("%04X", (($i << 3) + $j));
|
||||||
|
|
||||||
|
$cat = $bidicategory{$k};
|
||||||
|
if( $cat ne "")
|
||||||
|
{
|
||||||
|
$data = $data + ($map{$cat} << (4*$j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$pattern = sprintf("0x%08X", $data);
|
||||||
|
if($data ne 0)
|
||||||
|
{
|
||||||
|
print "WARNING, Unicode Database now contain characters" .
|
||||||
|
"which we have not consider, change this program !!!\n\n";
|
||||||
|
printf "Problem- U+%04X - U+%04X range\n", ($i << 3),((($i +1)<< 3)-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if($newidx > 255)
|
||||||
|
{
|
||||||
|
die "We have more than 255 patterns !!! - $newidx\n\n" .
|
||||||
|
"This program is now broken!!!\n\n\n";
|
||||||
|
|
||||||
|
}
|
||||||
|
printf OUT "static PRUint32 gBidiCatPat[$newidx] = {\n";
|
||||||
|
for($i = 0 ; $i < $newidx; $i++)
|
||||||
|
{
|
||||||
|
printf OUT " %s, /* $i */\n", $patarray[$i] ;
|
||||||
|
}
|
||||||
|
printf OUT "};\n\n";
|
||||||
|
$totaldata += $newidx * 4;
|
||||||
|
|
||||||
|
printf OUT "static eBidiCategory GetBidiCat(PRUnichar u)\n{\n";
|
||||||
|
printf OUT " PRUint32 pat;\n";
|
||||||
|
printf OUT " PRUint16 patidx;\n\n";
|
||||||
|
printf OUT " /* Handle blocks which use index table mapping */ \n\n";
|
||||||
|
for($t = 1; $t <= $tt; $t++)
|
||||||
|
{
|
||||||
|
$tl = $range[($t-1) * 2];
|
||||||
|
$th = $range[($t-1) * 2 + 1];
|
||||||
|
if ($tl == 0) {
|
||||||
|
printf OUT " /* Handle U+%04X to U+%04X */\n", $tl, $th;
|
||||||
|
printf OUT " if (u<=((PRUnichar)0x%04X)) {\n", $th;
|
||||||
|
printf OUT " patidx = gBidiCatIdx%d [( u >> 3 )];\n", $t;
|
||||||
|
} elsif ($th == 0xFFFF) {
|
||||||
|
printf OUT " /* Handle U+%04X to U+%04X */\n", $tl, $th;
|
||||||
|
printf OUT " if (((PRUnichar)0x%04X)<=u) {\n", $tl;
|
||||||
|
printf OUT " patidx = gBidiCatIdx%d [( (u -(PRUnichar) 0x%04X) >> 3 )];\n", $t, $tl;
|
||||||
|
} else {
|
||||||
|
printf OUT " /* Handle U+%04X to U+%04X */\n", $tl, $th;
|
||||||
|
printf OUT " if ((((PRUnichar)0x%04X)<=u)&&(u<=((PRUnichar)0x%04X))) {\n", $tl, $th;
|
||||||
|
printf OUT " patidx = gBidiCatIdx%d [( (u -(PRUnichar) 0x%04X) >> 3 )];\n", $t, $tl;
|
||||||
|
}
|
||||||
|
printf OUT " if (patidx < 0x10)\n";
|
||||||
|
printf OUT " return (eBidiCategory)patidx;\n";
|
||||||
|
printf OUT " else {\n";
|
||||||
|
printf OUT " pat = gBidiCatPat[patidx];\n";
|
||||||
|
printf OUT " return (eBidiCategory)((pat >> ((u % 8) * 4)) & 0x0F);\n";
|
||||||
|
printf OUT " }\n";
|
||||||
|
printf OUT " }\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
@special = keys(%sh);
|
||||||
|
$sp = 0;
|
||||||
|
foreach $s ( sort(@special) ) {
|
||||||
|
# don't bother to define the special blocks unless they have a different
|
||||||
|
# value from the default they would be given if they were undefined
|
||||||
|
unless ($sc{$s} == $map{"L"}) {
|
||||||
|
unless ($sp++) {
|
||||||
|
%by_value = reverse %map;
|
||||||
|
printf OUT " /* Handle blocks which share the same category */\n\n";
|
||||||
|
}
|
||||||
|
printf OUT " /* Handle %s block */\n", substr($s, 1);
|
||||||
|
printf OUT " if((((PRUnichar)0x%s)<=u)&&(u<=((PRUnichar)0x%s))) \n", $sl{$s}, $sh{$s};
|
||||||
|
printf OUT " return eBidiCat_$by_value{$sc{$s}}; \n\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
printf OUT " return eBidiCat_L; /* UNDEFINE = L */\n};\n";
|
||||||
|
|
||||||
|
printf OUT "/* total data size = $totaldata */\n";
|
||||||
|
printf OUT "\n#endif /* IBMBIDI */\n";
|
||||||
|
print "total = $totaldata\n";
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
#
|
||||||
|
# Close files
|
||||||
|
#
|
||||||
|
######################################################################
|
||||||
|
close(UNIDATA);
|
||||||
|
close(OUT);
|
||||||
|
|
|
@ -0,0 +1,138 @@
|
||||||
|
#!/usr/local/bin/perl
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the Mozilla Public
|
||||||
|
# License Version 1.1 (the "License"); you may not use this file
|
||||||
|
# except in compliance with the License. You may obtain a copy of
|
||||||
|
# the License at http://www.mozilla.org/MPL/
|
||||||
|
#
|
||||||
|
# Software distributed under the License is distributed on an "AS
|
||||||
|
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||||
|
# implied. See the License for the specific language governing
|
||||||
|
# rights and limitations under the License.
|
||||||
|
#
|
||||||
|
# The Original Code is IBM code.
|
||||||
|
#
|
||||||
|
# The Initial Developer of the Original Code is IBM.
|
||||||
|
# Portions created by IBM are
|
||||||
|
# Copyright (C) International Business Machines
|
||||||
|
# Corporation, 2000. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Contributor(s): Simon Montagu
|
||||||
|
#
|
||||||
|
|
||||||
|
# This program generates the header file symmtable.h from the Unicode
|
||||||
|
# informative data file BidiMirroring.txt.
|
||||||
|
# See the comments in that file for details of its structure and contents.
|
||||||
|
|
||||||
|
# Process the input file
|
||||||
|
$ucp = "[0-9a-fA-F]{4}"; # Unicode code point (4 successive hex digits) as a pattern to match
|
||||||
|
open ( UNICODATA , "< BidiMirroring.txt")
|
||||||
|
|| die "Cannot find BidiMirroring.txt.\
|
||||||
|
The file should be avaiable here:\
|
||||||
|
http://www.unicode.org/Public/UNIDATA/BidiMirroring.txt\n";
|
||||||
|
|
||||||
|
while (<UNICODATA>) {
|
||||||
|
chop;
|
||||||
|
if (/^($ucp); ($ucp) # (.+)/) { # If the line looks like this pattern
|
||||||
|
# (example: 0028; 0029 # LEFT PARENTHESIS)
|
||||||
|
@table[hex($1)]=hex($1) ^ hex($2); # Enter the character XOR its symmetric pair in the table
|
||||||
|
@isblock[hex(substr($1, 0, 2))]=1; # Remember this block
|
||||||
|
}
|
||||||
|
elsif (/^# ($ucp); (.+)/) { # If the line looks like this pattern
|
||||||
|
# (example: # 2201; COMPLEMENT)
|
||||||
|
@table[hex($1)]=0xff; # Enter 0xff in the table
|
||||||
|
@isblock[hex(substr($1, 0, 2))]=2; # Remember this block
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(UNICODATA);
|
||||||
|
|
||||||
|
# Generate license and header
|
||||||
|
open ( OUT , "> ../src/symmtable.h")
|
||||||
|
|| die "cannot open output ../src/symmtable.h file";
|
||||||
|
$npl = <<END_OF_NPL;
|
||||||
|
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public
|
||||||
|
* License Version 1.1 (the "License"); you may not use this file
|
||||||
|
* except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS
|
||||||
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||||
|
* implied. See the License for the specific language governing
|
||||||
|
* rights and limitations under the License.
|
||||||
|
*
|
||||||
|
* The Original Code is IBM code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is IBM.
|
||||||
|
* Portions created by IBM are
|
||||||
|
* Copyright (C) International Business Machines
|
||||||
|
* Corporation, 2000. All Rights Reserved.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
|
||||||
|
mozilla/intl/unicharutil/tools/gensymmtable.pl
|
||||||
|
*/
|
||||||
|
END_OF_NPL
|
||||||
|
print OUT $npl;
|
||||||
|
|
||||||
|
print OUT "#ifdef IBMBIDI\n\n";
|
||||||
|
print OUT "#ifdef HANDLE_GLYPHS_WITHOUT_MATES\n";
|
||||||
|
print OUT "#define GWM 0xff\n";
|
||||||
|
print OUT "#else\n";
|
||||||
|
print OUT "#define GWM 0\n";
|
||||||
|
print OUT "#endif\n";
|
||||||
|
|
||||||
|
# Generate data tables
|
||||||
|
foreach $block (0 .. 0xff) {
|
||||||
|
if (@isblock[$block]) {
|
||||||
|
printf OUT "\n/* Block U%02X__ */\n", $block;
|
||||||
|
printf OUT "const static PRUint8 symmtable_%02X[256] = {\n", $block;
|
||||||
|
print OUT "/* ";
|
||||||
|
foreach $byte (0 .. 0xf) {
|
||||||
|
printf OUT " _%X ", $byte;
|
||||||
|
}
|
||||||
|
print OUT "*/\n";
|
||||||
|
foreach $row (0 .. 0xf) {
|
||||||
|
printf OUT "/* %X_ */ ", $row;
|
||||||
|
foreach $byte (0 .. 0xf) {
|
||||||
|
$ix = ($block << 8) | ($row << 4) | ($byte);
|
||||||
|
printf OUT ((0xff == @table[$ix]) ? " GWM, " : "%#4x, ", @table[$ix]);
|
||||||
|
}
|
||||||
|
print OUT "\n";
|
||||||
|
}
|
||||||
|
print OUT "};\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generate conversion method
|
||||||
|
print OUT "\nstatic PRUnichar Mirrored(PRUnichar u)\n{\n";
|
||||||
|
print OUT "#ifdef HANDLE_GLYPHS_WITHOUT_MATES\n";
|
||||||
|
print OUT " PRUint8 mask;\n";
|
||||||
|
print OUT "#endif\n\n";
|
||||||
|
print OUT " switch (u & 0xFF00) {\n";
|
||||||
|
foreach $block (0 .. 0xff) {
|
||||||
|
if (1==@isblock[$block]) {
|
||||||
|
printf OUT "\n case %#x:\n", $block * 256;
|
||||||
|
printf OUT " u ^= symmtable_%02X[u & 0xff];\n", $block;
|
||||||
|
print OUT " break;\n";
|
||||||
|
}
|
||||||
|
elsif (2==@isblock[$block]) {
|
||||||
|
print OUT "#ifdef HANDLE_GLYPHS_WITHOUT_MATES // placeholder for code to do something in these cases\n";
|
||||||
|
printf OUT " case %#x:\n", $block * 256;
|
||||||
|
printf OUT " mask = symmtable_%02X[u & 0xff];\n", $block;
|
||||||
|
print OUT " if (GWM == mask)\n";
|
||||||
|
print OUT " ; // Do something\n";
|
||||||
|
print OUT " else\n";
|
||||||
|
print OUT " u ^= mask;\n";
|
||||||
|
print OUT " break;\n";
|
||||||
|
print OUT "#else\n";
|
||||||
|
printf OUT " case %#x:\n", $block * 256;
|
||||||
|
printf OUT " u ^= symmtable_%02X[u & 0xff];\n", $block;
|
||||||
|
print OUT " break;\n";
|
||||||
|
print OUT "#endif\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print OUT " }\n return u;\n}\n";
|
||||||
|
printf OUT "#endif // IBMBIDI\n";
|
||||||
|
close(OUT);
|
Загрузка…
Ссылка в новой задаче