diff --git a/intl/unicharutil/tools/BidiMirroring.txt b/intl/unicharutil/tools/BidiMirroring.txt new file mode 100644 index 000000000000..1b62c128b17a --- /dev/null +++ b/intl/unicharutil/tools/BidiMirroring.txt @@ -0,0 +1,238 @@ +# BidiMirroring-1.txt +# +# This file is an informative supplement to the UnicodeData file. It +# lists characters that have the mirrored property +# where there is another Unicode character that typically has a glyph +# that is the mirror image of the original character's glyph. +# The repertoire covered by the file is Unicode 3.0.1. +# +# The file contains a list of lines with mappings from one code point +# to another one for character-based mirroring. +# Note that for "real" mirroring, a rendering engine needs to select +# appropriate alternative glyphs, and that many Unicode characters do not +# have a mirror-image Unicode character. +# +# Each mapping line contains two fields, separated by a semicolon (';'). +# Each of the two fields contains a code point represented as a +# variable-length hexadecimal value with 4 to 6 digits. +# A comment indicates where the characters are "BEST FIT" mirroring. +# +# Code points with the "mirrored" property but no appropriate mirrors are +# listed as comments at the end of the file. +# +# For information on bidi mirroring, see UTR #21: Bidirectional Algorithm, +# at http://www.unicode.org/unicode/reports/tr9/ +# +# Please address any comments to . +# Note that this is an archival address: messages will be checked, +# but do not expect an immediate response. +# +# This file was originally created by Markus Scherer +# +# ############################################################ + +0028; 0029 # LEFT PARENTHESIS +0029; 0028 # RIGHT PARENTHESIS +003C; 003E # LESS-THAN SIGN +003E; 003C # GREATER-THAN SIGN +005B; 005D # LEFT SQUARE BRACKET +005D; 005B # RIGHT SQUARE BRACKET +007B; 007D # LEFT CURLY BRACKET +007D; 007B # RIGHT CURLY BRACKET +00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2045; 2046 # LEFT SQUARE BRACKET WITH QUILL +2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL +207D; 207E # SUPERSCRIPT LEFT PARENTHESIS +207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS +208D; 208E # SUBSCRIPT LEFT PARENTHESIS +208E; 208D # SUBSCRIPT RIGHT PARENTHESIS +2208; 220B # ELEMENT OF +2209; 220C # NOT AN ELEMENT OF +220A; 220D # SMALL ELEMENT OF +220B; 2208 # CONTAINS AS MEMBER +220C; 2209 # DOES NOT CONTAIN AS MEMBER +220D; 220A # SMALL CONTAINS AS MEMBER +223C; 223D # TILDE OPERATOR +223D; 223C # REVERSED TILDE +2243; 22CD # ASYMPTOTICALLY EQUAL TO +2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO +2254; 2255 # COLON EQUALS +2255; 2254 # EQUALS COLON +2264; 2265 # LESS-THAN OR EQUAL TO +2265; 2264 # GREATER-THAN OR EQUAL TO +2266; 2267 # LESS-THAN OVER EQUAL TO +2267; 2266 # GREATER-THAN OVER EQUAL TO +2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO +2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO +226A; 226B # MUCH LESS-THAN +226B; 226A # MUCH GREATER-THAN +226E; 226F # [BEST FIT] NOT LESS-THAN +226F; 226E # [BEST FIT] NOT GREATER-THAN +2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO +2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO +2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO +2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO +2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO +2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO +2276; 2277 # LESS-THAN OR GREATER-THAN +2277; 2276 # GREATER-THAN OR LESS-THAN +2278; 2279 # NEITHER LESS-THAN NOR GREATER-THAN +2279; 2278 # NEITHER GREATER-THAN NOR LESS-THAN +227A; 227B # PRECEDES +227B; 227A # SUCCEEDS +227C; 227D # PRECEDES OR EQUAL TO +227D; 227C # SUCCEEDS OR EQUAL TO +227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO +227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO +2280; 2281 # [BEST FIT] DOES NOT PRECEDE +2281; 2280 # [BEST FIT] DOES NOT SUCCEED +2282; 2283 # SUBSET OF +2283; 2282 # SUPERSET OF +2284; 2285 # [BEST FIT] NOT A SUBSET OF +2285; 2284 # [BEST FIT] NOT A SUPERSET OF +2286; 2287 # SUBSET OF OR EQUAL TO +2287; 2286 # SUPERSET OF OR EQUAL TO +2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO +2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO +228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO +228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO +228F; 2290 # SQUARE IMAGE OF +2290; 228F # SQUARE ORIGINAL OF +2291; 2292 # SQUARE IMAGE OF OR EQUAL TO +2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO +22A2; 22A3 # RIGHT TACK +22A3; 22A2 # LEFT TACK +22B0; 22B1 # PRECEDES UNDER RELATION +22B1; 22B0 # SUCCEEDS UNDER RELATION +22B2; 22B3 # NORMAL SUBGROUP OF +22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP +22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO +22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO +22B6; 22B7 # ORIGINAL OF +22B7; 22B6 # IMAGE OF +22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT +22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT +22CB; 22CC # LEFT SEMIDIRECT PRODUCT +22CC; 22CB # RIGHT SEMIDIRECT PRODUCT +22CD; 2243 # REVERSED TILDE EQUALS +22D0; 22D1 # DOUBLE SUBSET +22D1; 22D0 # DOUBLE SUPERSET +22D6; 22D7 # LESS-THAN WITH DOT +22D7; 22D6 # GREATER-THAN WITH DOT +22D8; 22D9 # VERY MUCH LESS-THAN +22D9; 22D8 # VERY MUCH GREATER-THAN +22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN +22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN +22DC; 22DD # EQUAL TO OR LESS-THAN +22DD; 22DC # EQUAL TO OR GREATER-THAN +22DE; 22DF # EQUAL TO OR PRECEDES +22DF; 22DE # EQUAL TO OR SUCCEEDS +22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL +22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL +22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO +22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO +22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO +22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO +22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO +22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO +22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO +22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO +22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF +22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP +22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO +22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL +22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS +22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS +2308; 2309 # LEFT CEILING +2309; 2308 # RIGHT CEILING +230A; 230B # LEFT FLOOR +230B; 230A # RIGHT FLOOR +2329; 232A # LEFT-POINTING ANGLE BRACKET +232A; 2329 # RIGHT-POINTING ANGLE BRACKET +3008; 3009 # LEFT ANGLE BRACKET +3009; 3008 # RIGHT ANGLE BRACKET +300A; 300B # LEFT DOUBLE ANGLE BRACKET +300B; 300A # RIGHT DOUBLE ANGLE BRACKET +300C; 300D # [BEST FIT] LEFT CORNER BRACKET +300D; 300C # [BEST FIT] RIGHT CORNER BRACKET +300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET +300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET +3010; 3011 # LEFT BLACK LENTICULAR BRACKET +3011; 3010 # RIGHT BLACK LENTICULAR BRACKET +3014; 3015 # [BEST FIT] LEFT TORTOISE SHELL BRACKET +3015; 3014 # [BEST FIT] RIGHT TORTOISE SHELL BRACKET +3016; 3017 # LEFT WHITE LENTICULAR BRACKET +3017; 3016 # RIGHT WHITE LENTICULAR BRACKET +3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET +3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET +301A; 301B # LEFT WHITE SQUARE BRACKET +301B; 301A # RIGHT WHITE SQUARE BRACKET + +# The following characters have no appropriate mirroring character + +# 2201; COMPLEMENT +# 2202; PARTIAL DIFFERENTIAL +# 2203; THERE EXISTS +# 2204; THERE DOES NOT EXIST +# 2211; N-ARY SUMMATION +# 2215; DIVISION SLASH +# 2216; SET MINUS +# 221A; SQUARE ROOT +# 221B; CUBE ROOT +# 221C; FOURTH ROOT +# 221D; PROPORTIONAL TO +# 221F; RIGHT ANGLE +# 2220; ANGLE +# 2221; MEASURED ANGLE +# 2222; SPHERICAL ANGLE +# 2224; DOES NOT DIVIDE +# 2226; NOT PARALLEL TO +# 222B; INTEGRAL +# 222C; DOUBLE INTEGRAL +# 222D; TRIPLE INTEGRAL +# 222E; CONTOUR INTEGRAL +# 222F; SURFACE INTEGRAL +# 2230; VOLUME INTEGRAL +# 2231; CLOCKWISE INTEGRAL +# 2232; CLOCKWISE CONTOUR INTEGRAL +# 2233; ANTICLOCKWISE CONTOUR INTEGRAL +# 2239; EXCESS +# 223B; HOMOTHETIC +# 223E; INVERTED LAZY S +# 223F; SINE WAVE +# 2240; WREATH PRODUCT +# 2241; NOT TILDE +# 2242; MINUS TILDE +# 2244; NOT ASYMPTOTICALLY EQUAL TO +# 2245; APPROXIMATELY EQUAL TO +# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO +# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +# 2248; ALMOST EQUAL TO +# 2249; NOT ALMOST EQUAL TO +# 224A; ALMOST EQUAL OR EQUAL TO +# 224B; TRIPLE TILDE +# 224C; ALL EQUAL TO +# 225F; QUESTIONED EQUAL TO +# 2260; NOT EQUAL TO +# 2262; NOT IDENTICAL TO +# 228C; MULTISET +# 2298; CIRCLED DIVISION SLASH +# 22A6; ASSERTION +# 22A7; MODELS +# 22A8; TRUE +# 22A9; FORCES +# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE +# 22AB; DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +# 22AC; DOES NOT PROVE +# 22AD; NOT TRUE +# 22AE; DOES NOT FORCE +# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +# 22B8; MULTIMAP +# 22BE; RIGHT ANGLE WITH ARC +# 22BF; RIGHT TRIANGLE +# 2320; TOP HALF INTEGRAL +# 2321; BOTTOM HALF INTEGRAL diff --git a/intl/unicharutil/tools/genbidicattable.pl b/intl/unicharutil/tools/genbidicattable.pl new file mode 100644 index 000000000000..e814a88c37f9 --- /dev/null +++ b/intl/unicharutil/tools/genbidicattable.pl @@ -0,0 +1,345 @@ +#!/usr/local/bin/perl +# +# The contents of this file are subject to the Mozilla Public +# License Version 1.1 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS +# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or +# implied. See the License for the specific language governing +# rights and limitations under the License. +# +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is IBM +# Corporation. Portions created by IBM are +# Copyright (C) 2000 IBM Corporation. All +# Rights Reserved. +# +# Contributor(s): +# + +###################################################################### +# +# Initial global variable +# +###################################################################### + +%gcount = (); +%pat = (); + +%map = ( + "L" => "1", # Left-to-Right + "R" => "2", # Right-to-Left + "AL" => "3", # Right-to-Left Arabic + "AN" => "4", # Arabic Number + "EN" => "5", # European Number + "ES" => "6", # European Number Separator + "ET" => "7", # European Number Terminator + "CS" => "8", # Common Number Separator + "ON" => "9", # Other Neutrals + "NSM" => "10", # Non-Spacing Mark + "BN" => "11", # Boundary Neutral + "B" => "12", # Paragraph Separator + "S" => "13", # Segment Separator + "WS" => "14", # Whitespace + "LRE" => "15", # Left-to-Right Embedding + "RLE" => "15", # Right-to-Left Embedding + "PDF" => "15", # Pop Directional Format + "LRO" => "15", # Left-to-Right Override + "RLO" => "15" # Right-to-Left Override +); + +%special = (); + +###################################################################### +# +# Open the unicode database file +# +###################################################################### +open ( UNICODATA , "< UnicodeData-Latest.txt") + || die "cannot find UnicodeData-Latest.txt"; + +###################################################################### +# +# Open the output file +# +###################################################################### +open ( OUT , "> ../src/bidicattable.h") + || die "cannot open output ../src/bidicattable.h file"; + +###################################################################### +# +# Generate license and header +# +###################################################################### +$npl = <) { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + $c = $f[0]; # The unicode value + $n = $f[1]; # The unicode name + $g = $f[2]; # The General Category + $b = $f[4]; # The Bidi Category + + if(( substr($n, 0, 1) ne "<") || ($n eq "")) + { + # + # print $g; + # + + $gcount{$b}++; + $bidicategory{$c} = $b; + } else { + + # Handle special block + @pair=split(/, /, $n ); + $catnum = $map{$b}; + + # printf "[%s][%s] => %d\n", $pair[0], $pair[1], $catnum; + if( $pair[1] eq "First>") { + $sl{$pair[0]} = $c; + $sc{$pair[0]} = $catnum; + } elsif ( $pair[1] eq "Last>") { + $sh{$pair[0]} = $c; + if($sc{$pair[0]} ne $catnum) + { + print "WARNING !!!! error in handling special block\n\n"; + } + } else { + print "WARNING !!!! error in handling special block\n\n"; + } + } +} + +# XXX - How can this be made more flexible as new blocks are added to the UCDB? + +@range = ( + 0x0000, 0x07ff, + 0x0900, 0x18ff, + 0x1e00, 0x28ff, + 0x2e80, 0x33ff, + 0xa000, 0xa4ff, + 0xf900, 0xffff +); + + +$totaldata = 0; + +$tt=($#range+1) / 2; +@patarray = (); + + +# This should improve performance: put all the patterns like 0x11111111, 0x22222222 etc at the beginning of the table. +# Since there are a lot of blocks with the same category, we should be able to save a lot of time extracting the digits +for (0..15) { + $pattern = "0x".(sprintf("%X", $_) x 8); + $patarray[$_] = $pattern; + $pat{$pattern} = $_; +} + +$newidx = 0x10; + +for($t = 1; $t <= $tt; $t++) +{ + $tl = $range[($t-1) * 2]; + $th = $range[($t-1) * 2 + 1]; + $ts = ( $th - $tl ) >> 3; + $totaldata += $ts + 1; + printf OUT "static PRUint8 gBidiCatIdx%d[%d] = {\n", $t, $ts + 1; + for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ ) + { + $data = 0; + + for($j = 0; $j < 8 ; $j++) + { + #defaults for unassigned characters -- see table 3.7 in the Unicode Bidi Algorithm + $test = ($i << 3) + $j; + if ((($test >= 0x0590) && ($test <= 0x5FF)) + || (($test >= 0xFB1D) && ($test <= 0xFB4F))) + { + $default = $map{"R"}; + } elsif ((($test >= 0x0600) && ($test <= 0x7BF)) + || (($test >= 0xFB50) && ($test <= 0xFDFF)) + || (($test >= 0xFE70) && ($test <= 0xFEFF))) + { + $default = $map{"AL"}; + } else + { + $default = $map{"L"}; + } + $k = sprintf("%04X", (($i << 3) + $j)); + + $cat = $bidicategory{$k}; + if( $cat eq "") + { + $data = $data + ($default << (4*$j)); + } else { + $data = $data + ($map{$cat} << (4*$j)); + } + + } + $pattern = sprintf("0x%08X", $data); + + $idx = $pat{$pattern}; + unless( exists($pat{$pattern})){ + $idx = $newidx++; + $patarray[$idx] = $pattern; + $pat{$pattern} = $idx; + } + + printf OUT " %3d, /* U+%04X - U+%04X : %s */\n" , + $idx, ($i << 3),((($i +1)<< 3)-1), $pattern ; + + + } + printf OUT "};\n\n"; + + if($t ne $tt) + { + $tl = $range[($t-1) * 2 + 1] + 1; + $th = $range[$t * 2] - 1; + for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ ) + { + $data = 0; + for($j = 0; $j < 8 ; $j++) + { + $k = sprintf("%04X", (($i << 3) + $j)); + + $cat = $bidicategory{$k}; + if( $cat ne "") + { + $data = $data + ($map{$cat} << (4*$j)); + } + } + $pattern = sprintf("0x%08X", $data); + if($data ne 0) + { + print "WARNING, Unicode Database now contain characters" . + "which we have not consider, change this program !!!\n\n"; + printf "Problem- U+%04X - U+%04X range\n", ($i << 3),((($i +1)<< 3)-1); + } + } + } +} + + +if($newidx > 255) +{ + die "We have more than 255 patterns !!! - $newidx\n\n" . + "This program is now broken!!!\n\n\n"; + +} +printf OUT "static PRUint32 gBidiCatPat[$newidx] = {\n"; +for($i = 0 ; $i < $newidx; $i++) +{ + printf OUT " %s, /* $i */\n", $patarray[$i] ; +} +printf OUT "};\n\n"; +$totaldata += $newidx * 4; + +printf OUT "static eBidiCategory GetBidiCat(PRUnichar u)\n{\n"; +printf OUT " PRUint32 pat;\n"; +printf OUT " PRUint16 patidx;\n\n"; +printf OUT " /* Handle blocks which use index table mapping */ \n\n"; +for($t = 1; $t <= $tt; $t++) +{ + $tl = $range[($t-1) * 2]; + $th = $range[($t-1) * 2 + 1]; + if ($tl == 0) { + printf OUT " /* Handle U+%04X to U+%04X */\n", $tl, $th; + printf OUT " if (u<=((PRUnichar)0x%04X)) {\n", $th; + printf OUT " patidx = gBidiCatIdx%d [( u >> 3 )];\n", $t; + } elsif ($th == 0xFFFF) { + printf OUT " /* Handle U+%04X to U+%04X */\n", $tl, $th; + printf OUT " if (((PRUnichar)0x%04X)<=u) {\n", $tl; + printf OUT " patidx = gBidiCatIdx%d [( (u -(PRUnichar) 0x%04X) >> 3 )];\n", $t, $tl; + } else { + printf OUT " /* Handle U+%04X to U+%04X */\n", $tl, $th; + printf OUT " if ((((PRUnichar)0x%04X)<=u)&&(u<=((PRUnichar)0x%04X))) {\n", $tl, $th; + printf OUT " patidx = gBidiCatIdx%d [( (u -(PRUnichar) 0x%04X) >> 3 )];\n", $t, $tl; + } + printf OUT " if (patidx < 0x10)\n"; + printf OUT " return (eBidiCategory)patidx;\n"; + printf OUT " else {\n"; + printf OUT " pat = gBidiCatPat[patidx];\n"; + printf OUT " return (eBidiCategory)((pat >> ((u % 8) * 4)) & 0x0F);\n"; + printf OUT " }\n"; + printf OUT " }\n\n"; +} + +@special = keys(%sh); +$sp = 0; +foreach $s ( sort(@special) ) { + # don't bother to define the special blocks unless they have a different + # value from the default they would be given if they were undefined + unless ($sc{$s} == $map{"L"}) { + unless ($sp++) { + %by_value = reverse %map; + printf OUT " /* Handle blocks which share the same category */\n\n"; + } + printf OUT " /* Handle %s block */\n", substr($s, 1); + printf OUT " if((((PRUnichar)0x%s)<=u)&&(u<=((PRUnichar)0x%s))) \n", $sl{$s}, $sh{$s}; + printf OUT " return eBidiCat_$by_value{$sc{$s}}; \n\n"; + } +} + + + +printf OUT " return eBidiCat_L; /* UNDEFINE = L */\n};\n"; + +printf OUT "/* total data size = $totaldata */\n"; +printf OUT "\n#endif /* IBMBIDI */\n"; +print "total = $totaldata\n"; + +###################################################################### +# +# Close files +# +###################################################################### +close(UNIDATA); +close(OUT); + diff --git a/intl/unicharutil/tools/gensymmtable.pl b/intl/unicharutil/tools/gensymmtable.pl new file mode 100644 index 000000000000..8fd6ec42e591 --- /dev/null +++ b/intl/unicharutil/tools/gensymmtable.pl @@ -0,0 +1,138 @@ +#!/usr/local/bin/perl +# +# The contents of this file are subject to the Mozilla Public +# License Version 1.1 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS +# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or +# implied. See the License for the specific language governing +# rights and limitations under the License. +# +# The Original Code is IBM code. +# +# The Initial Developer of the Original Code is IBM. +# Portions created by IBM are +# Copyright (C) International Business Machines +# Corporation, 2000. All Rights Reserved. +# +# Contributor(s): Simon Montagu +# + +# This program generates the header file symmtable.h from the Unicode +# informative data file BidiMirroring.txt. +# See the comments in that file for details of its structure and contents. + +# Process the input file +$ucp = "[0-9a-fA-F]{4}"; # Unicode code point (4 successive hex digits) as a pattern to match +open ( UNICODATA , "< BidiMirroring.txt") + || die "Cannot find BidiMirroring.txt.\ +The file should be avaiable here:\ +http://www.unicode.org/Public/UNIDATA/BidiMirroring.txt\n"; + +while () { + chop; + if (/^($ucp); ($ucp) # (.+)/) { # If the line looks like this pattern + # (example: 0028; 0029 # LEFT PARENTHESIS) + @table[hex($1)]=hex($1) ^ hex($2); # Enter the character XOR its symmetric pair in the table + @isblock[hex(substr($1, 0, 2))]=1; # Remember this block + } + elsif (/^# ($ucp); (.+)/) { # If the line looks like this pattern + # (example: # 2201; COMPLEMENT) + @table[hex($1)]=0xff; # Enter 0xff in the table + @isblock[hex(substr($1, 0, 2))]=2; # Remember this block + } +} +close(UNICODATA); + +# Generate license and header +open ( OUT , "> ../src/symmtable.h") + || die "cannot open output ../src/symmtable.h file"; +$npl = <