зеркало из https://github.com/mozilla/gecko-dev.git
bug 738101 - add support for more Unicode properties. r=smontagu
This commit is contained in:
Родитель
a62d4c83ae
Коммит
c73b93163a
|
@ -54,9 +54,14 @@
|
|||
# - BidiMirroring.txt
|
||||
# - HangulSyllableType.txt
|
||||
# - ReadMe.txt (to record version/date of the UCD)
|
||||
# - Unihan_Variants.txt (from Unihan.zip)
|
||||
# though this may change if we find a need for additional properties.
|
||||
#
|
||||
# The Unicode data files should be together in a single directory.
|
||||
# The Unicode data files listed above should be together in one directory.
|
||||
# We also require the file
|
||||
# http://www.unicode.org/Public/security/latest/xidmodifications.txt
|
||||
# This file should be in a sub-directory "security" immediately below the
|
||||
# directory containing the other Unicode data files.
|
||||
#
|
||||
# (2) Run this tool using a command line of the form
|
||||
#
|
||||
|
@ -74,6 +79,7 @@
|
|||
# in the current directory.
|
||||
|
||||
use strict;
|
||||
use List::Util qw(first);
|
||||
|
||||
if ($#ARGV != 1) {
|
||||
print <<__EOT;
|
||||
|
@ -251,6 +257,41 @@ while (<FH>) {
|
|||
}
|
||||
close FH;
|
||||
|
||||
my %xidmodCode = (
|
||||
'inclusion' => 0,
|
||||
'recommended' => 1,
|
||||
'default-ignorable' => 2,
|
||||
'historic' => 3,
|
||||
'limited-use' => 4,
|
||||
'not-NFKC' => 5,
|
||||
'not-xid' => 6,
|
||||
'obsolete' => 7,
|
||||
'technical' => 8,
|
||||
'not-chars' => 9
|
||||
);
|
||||
|
||||
my %bidicategoryCode = (
|
||||
"L" => "0", # Left-to-Right
|
||||
"R" => "1", # Right-to-Left
|
||||
"EN" => "2", # European Number
|
||||
"ES" => "3", # European Number Separator
|
||||
"ET" => "4", # European Number Terminator
|
||||
"AN" => "5", # Arabic Number
|
||||
"CS" => "6", # Common Number Separator
|
||||
"B" => "7", # Paragraph Separator
|
||||
"S" => "8", # Segment Separator
|
||||
"WS" => "9", # Whitespace
|
||||
"ON" => "10", # Other Neutrals
|
||||
"LRE" => "11", # Left-to-Right Embedding
|
||||
"LRO" => "12", # Left-to-Right Override
|
||||
"AL" => "13", # Right-to-Left Arabic
|
||||
"RLE" => "14", # Right-to-Left Embedding
|
||||
"RLO" => "15", # Right-to-Left Override
|
||||
"PDF" => "16", # Pop Directional Format
|
||||
"NSM" => "17", # Non-Spacing Mark
|
||||
"BN" => "18" # Boundary Neutral
|
||||
);
|
||||
|
||||
# initialize default properties
|
||||
my @script;
|
||||
my @category;
|
||||
|
@ -259,11 +300,27 @@ my @eaw;
|
|||
my @mirror;
|
||||
my @hangul;
|
||||
my @casemap;
|
||||
my @xidmod;
|
||||
my @numericvalue;
|
||||
my @hanVariant;
|
||||
my @bidicategory;
|
||||
for (my $i = 0; $i < 0x110000; ++$i) {
|
||||
$script[$i] = $scriptCode{"UNKNOWN"};
|
||||
$category[$i] = $catCode{"UNASSIGNED"};
|
||||
$combining[$i] = 0;
|
||||
$casemap[$i] = 0;
|
||||
$xidmod[$i] = $xidmodCode{"not-chars"};
|
||||
$numericvalue[$i] = -1;
|
||||
$hanVariant[$i] = 0;
|
||||
$bidicategory[$i] = $bidicategoryCode{"L"};
|
||||
}
|
||||
|
||||
# blocks where the default for bidi category is not L
|
||||
for my $i (0x0600..0x07BF, 0x08A0..0x08FF, 0xFB50..0xFDCF, 0xFDF0..0xFDFF, 0xFE70..0xFEFF, 0x1EE00..0x0001EEFF) {
|
||||
$bidicategory[$i] = $bidicategoryCode{"AL"};
|
||||
}
|
||||
for my $i (0x0590..0x05FF, 0x07C0..0x089F, 0xFB1D..0xFB4F, 0x00010800..0x00010FFF, 0x0001E800..0x0001EDFF, 0x0001EF00..0x0001EFFF) {
|
||||
$bidicategory[$i] = $bidicategoryCode{"R"};
|
||||
}
|
||||
|
||||
my %ucd2hb = (
|
||||
|
@ -328,6 +385,13 @@ while (<FH>) {
|
|||
do {
|
||||
$category[$first] = $catCode{$ucd2hb{$fields[2]}};
|
||||
$combining[$first] = $fields[3];
|
||||
$bidicategory[$first] = $bidicategoryCode{$fields[4]};
|
||||
unless (length($fields[7]) == 0) {
|
||||
$numericvalue[$first] = $fields[7];
|
||||
}
|
||||
if ($fields[1] =~ /CJK/) {
|
||||
@hanVariant[$first] = 3;
|
||||
}
|
||||
$first++;
|
||||
} while ($first <= $last);
|
||||
} else {
|
||||
|
@ -358,6 +422,13 @@ while (<FH>) {
|
|||
$casemap[$usv] |= $kLowerToUpper;
|
||||
$casemap[$usv] |= ($usv ^ $upper);
|
||||
}
|
||||
$bidicategory[$usv] = $bidicategoryCode{$fields[4]};
|
||||
unless (length($fields[7]) == 0) {
|
||||
$numericvalue[$usv] = $fields[7];
|
||||
}
|
||||
if ($fields[1] =~ /CJK/) {
|
||||
@hanVariant[$usv] = 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
close FH;
|
||||
|
@ -416,8 +487,9 @@ while (<FH>) {
|
|||
close FH;
|
||||
|
||||
# read BidiMirroring.txt
|
||||
my @distantMirrors = ();
|
||||
my $smallMirrorOffset = 64;
|
||||
my @offsets = ();
|
||||
push @offsets, 0;
|
||||
|
||||
open FH, "< $ARGV[1]/BidiMirroring.txt" or die "can't open UCD file BidiMirroring.txt\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
|
@ -429,13 +501,13 @@ while (<FH>) {
|
|||
s/#.*//;
|
||||
if (m/([0-9A-F]{4,6});\s*([0-9A-F]{4,6})/) {
|
||||
my $mirrorOffset = hex("0x$2") - hex("0x$1");
|
||||
if ($mirrorOffset < $smallMirrorOffset && $mirrorOffset >= -128) {
|
||||
$mirror[hex "0x$1"] = $mirrorOffset;
|
||||
} else {
|
||||
die "too many distant mirror codes\n" if scalar @distantMirrors == 128 - $smallMirrorOffset;
|
||||
$mirror[hex "0x$1"] = $smallMirrorOffset + scalar @distantMirrors;
|
||||
push @distantMirrors, hex("0x$2");
|
||||
my $offsetIndex = first { $offsets[$_] eq $mirrorOffset } 0..$#offsets;
|
||||
if ($offsetIndex == undef) {
|
||||
die "too many offset codes\n" if scalar @offsets == 31;
|
||||
push @offsets, $mirrorOffset;
|
||||
$offsetIndex = $#offsets;
|
||||
}
|
||||
$mirror[hex "0x$1"] = $offsetIndex;
|
||||
}
|
||||
}
|
||||
close FH;
|
||||
|
@ -470,6 +542,66 @@ while (<FH>) {
|
|||
}
|
||||
close FH;
|
||||
|
||||
# read xidmodifications.txt
|
||||
open FH, "< $ARGV[1]/security/xidmodifications.txt" or die "can't open UCD file xidmodifications.txt\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
unless (/\xef\xbb\xbf/) {
|
||||
push @versionInfo, $_;
|
||||
}
|
||||
last if /Generated:/;
|
||||
}
|
||||
while (<FH>) {
|
||||
if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+[^ ]+\s+;\s+([^ ]+)/) {
|
||||
my $xidmod = $3;
|
||||
warn "unknown Identifier Modification $xidmod" unless exists $xidmodCode{$xidmod};
|
||||
$xidmod = $xidmodCode{$xidmod};
|
||||
my $start = hex "0x$1";
|
||||
my $end = (defined $2) ? hex "0x$2" : $start;
|
||||
for (my $i = $start; $i <= $end; ++$i) {
|
||||
$xidmod[$i] = $xidmod;
|
||||
}
|
||||
}
|
||||
}
|
||||
close FH;
|
||||
|
||||
open FH, "< $ARGV[1]/Unihan_Variants.txt" or die "can't open UCD file Unihan_Variants.txt (from Unihan.zip)\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
push @versionInfo, $_;
|
||||
last if /Date:/;
|
||||
}
|
||||
my $savedusv = 0;
|
||||
my $hasTC = 0;
|
||||
my $hasSC = 0;
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
if (m/U\+([0-9A-F]{4,6})\s+k([^ ]+)Variant/) {
|
||||
my $usv = hex "0x$1";
|
||||
if ($usv != $savedusv) {
|
||||
unless ($savedusv == 0) {
|
||||
if ($hasTC && !$hasSC) {
|
||||
$hanVariant[$savedusv] = 1;
|
||||
} elsif (!$hasTC && $hasSC) {
|
||||
$hanVariant[$savedusv] = 2;
|
||||
}
|
||||
}
|
||||
$savedusv = $usv;
|
||||
$hasTC = 0;
|
||||
$hasSC = 0;
|
||||
}
|
||||
if ($2 eq "Traditional") {
|
||||
$hasTC = 1;
|
||||
}
|
||||
if ($2 eq "Simplified") {
|
||||
$hasSC = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
close FH;
|
||||
|
||||
my $timestamp = gmtime();
|
||||
|
||||
open DATA_TABLES, "> nsUnicodePropertyData.cpp" or die "unable to open nsUnicodePropertyData.cpp for output";
|
||||
|
@ -532,10 +664,28 @@ $versionInfo
|
|||
*/
|
||||
|
||||
#include "mozilla/StandardInteger.h"
|
||||
#include "harfbuzz/hb-common.h"
|
||||
#include "harfbuzz/hb.h"
|
||||
|
||||
__END
|
||||
|
||||
open HEADER, "> nsUnicodeScriptCodes.h" or die "unable to open nsUnicodeScriptCodes.h for output";
|
||||
|
||||
print HEADER <<__END;
|
||||
$licenseBlock
|
||||
/*
|
||||
* Created on $timestamp from UCD data files with version info:
|
||||
*
|
||||
|
||||
$versionInfo
|
||||
|
||||
*
|
||||
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
|
||||
*/
|
||||
|
||||
#ifndef NS_UNICODE_SCRIPT_CODES
|
||||
#define NS_UNICODE_SCRIPT_CODES
|
||||
__END
|
||||
|
||||
print DATA_TABLES "static const PRUint32 sScriptCodeToTag[] = {\n";
|
||||
for (my $i = 0; $i < scalar @scriptCodeToTag; ++$i) {
|
||||
printf DATA_TABLES " HB_TAG(%s)", $scriptCodeToTag[$i];
|
||||
|
@ -543,56 +693,54 @@ for (my $i = 0; $i < scalar @scriptCodeToTag; ++$i) {
|
|||
}
|
||||
print DATA_TABLES "};\n\n";
|
||||
|
||||
sub sprintScript
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("%d,", $script[$usv]);
|
||||
}
|
||||
&genTables("Script", "PRUint8", 10, 6, \&sprintScript, 16);
|
||||
our $totalData = 0;
|
||||
|
||||
sub sprintCC
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("%d,", $combining[$usv]);
|
||||
}
|
||||
&genTables("CClass", "PRUint8", 10, 6, \&sprintCC, 1);
|
||||
|
||||
print DATA_TABLES "static const PRInt32 kSmallMirrorOffset = $smallMirrorOffset;\n";
|
||||
print DATA_TABLES "static const PRUint16 sDistantMirrors[] = {\n";
|
||||
for (my $i = 0; $i < scalar @distantMirrors; ++$i) {
|
||||
printf DATA_TABLES " 0x%04X", $distantMirrors[$i];
|
||||
print DATA_TABLES $i < $#distantMirrors ? ",\n" : "\n";
|
||||
print DATA_TABLES "static const PRInt16 sMirrorOffsets[] = {\n";
|
||||
for (my $i = 0; $i < scalar @offsets; ++$i) {
|
||||
printf DATA_TABLES " $offsets[$i]";
|
||||
print DATA_TABLES $i < $#offsets ? ",\n" : "\n";
|
||||
}
|
||||
print DATA_TABLES "};\n\n";
|
||||
|
||||
sub sprintMirror
|
||||
sub sprintCharProps1
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("%d,", $mirror[$usv]);
|
||||
return sprintf("{%d,%d,%d}, ", $mirror[$usv], $hangul[$usv], $combining[$usv]);
|
||||
}
|
||||
&genTables("Mirror", "PRInt8", 9, 7, \&sprintMirror, 0);
|
||||
&genTables("CharProp1", "struct nsCharProps1 {\n unsigned char mMirrorOffsetIndex:5;\n unsigned char mHangulType:3;\n unsigned char mCombiningClass:8;\n};",
|
||||
"nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1);
|
||||
|
||||
sub sprintCatEAW
|
||||
sub sprintCharProps2
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("{%d,%d},", $eaw[$usv], $category[$usv]);
|
||||
return sprintf("{%d,%d,%d,%d,%d,%d},",
|
||||
$script[$usv], $eaw[$usv], $category[$usv],
|
||||
$bidicategory[$usv], $xidmod[$usv], $numericvalue[$usv]);
|
||||
}
|
||||
&genTables("CatEAW", "struct {\n unsigned char mEAW:3;\n unsigned char mCategory:5;\n}",
|
||||
9, 7, \&sprintCatEAW, 16);
|
||||
&genTables("CharProp2", "struct nsCharProps2 {\n unsigned char mScriptCode:8;\n unsigned char mEAW:3;\n unsigned char mCategory:5;\n unsigned char mBidiCategory:5;\n unsigned char mXidmod:4;\n signed char mNumericValue:5;\n unsigned char mHanVariant:2;\n};",
|
||||
"nsCharProps2", 11, 5, \&sprintCharProps2, 16, 4, 1);
|
||||
|
||||
sub sprintHangulType
|
||||
sub sprintHanVariants
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("%d,", $hangul[$usv]);
|
||||
my $baseUsv = shift;
|
||||
my $varShift = 0;
|
||||
my $val = 0;
|
||||
while ($varShift < 8) {
|
||||
$val |= $hanVariant[$baseUsv++] << $varShift;
|
||||
$varShift += 2;
|
||||
}
|
||||
return sprintf("0x%02x,", $val);
|
||||
}
|
||||
&genTables("Hangul", "PRUint8", 10, 6, \&sprintHangulType, 0);
|
||||
&genTables("HanVariant", "", "PRUint8", 9, 7, \&sprintHanVariants, 2, 1, 4);
|
||||
|
||||
sub sprintCasemap
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("0x%08x,", $casemap[$usv]);
|
||||
}
|
||||
&genTables("CaseMap", "PRUint32", 11, 5, \&sprintCasemap, 1);
|
||||
&genTables("CaseMap", "", "PRUint32", 11, 5, \&sprintCasemap, 1, 4, 1);
|
||||
|
||||
print STDERR "Total data = $totalData\n";
|
||||
|
||||
printf DATA_TABLES "const PRUint32 kTitleToUpper = 0x%08x;\n", $kTitleToUpper;
|
||||
printf DATA_TABLES "const PRUint32 kUpperToLower = 0x%08x;\n", $kUpperToLower;
|
||||
|
@ -602,14 +750,14 @@ printf DATA_TABLES "const PRUint32 kCaseMapCharMask = 0x%08x;\n\n", $kCaseMapCha
|
|||
|
||||
sub genTables
|
||||
{
|
||||
my ($prefix, $type, $indexBits, $charBits, $func, $maxPlane) = @_;
|
||||
my ($prefix, $typedef, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_;
|
||||
|
||||
print DATA_TABLES "#define k${prefix}MaxPlane $maxPlane\n";
|
||||
print DATA_TABLES "#define k${prefix}IndexBits $indexBits\n";
|
||||
print DATA_TABLES "#define k${prefix}CharBits $charBits\n";
|
||||
|
||||
my $indexLen = 1 << $indexBits;
|
||||
my $dataLen = 1 << $charBits;
|
||||
my $charsPerPage = 1 << $charBits;
|
||||
my %charIndex = ();
|
||||
my %pageMapIndex = ();
|
||||
my @pageMap = ();
|
||||
|
@ -620,8 +768,8 @@ sub genTables
|
|||
my $pageMap = "\x00" x $indexLen * 2;
|
||||
foreach my $page (0 .. $indexLen - 1) {
|
||||
my $charValues = "";
|
||||
foreach my $ch (0 .. $dataLen - 1) {
|
||||
my $usv = $plane * 0x10000 + $page * $dataLen + $ch;
|
||||
for (my $ch = 0; $ch < $charsPerPage; $ch += $charsPerEntry) {
|
||||
my $usv = $plane * 0x10000 + $page * $charsPerPage + $ch;
|
||||
$charValues .= &$func($usv);
|
||||
}
|
||||
chop $charValues;
|
||||
|
@ -659,7 +807,10 @@ sub genTables
|
|||
}
|
||||
print DATA_TABLES "};\n\n";
|
||||
|
||||
print DATA_TABLES "static const $type s${prefix}Values[$chCount][$dataLen] = {\n";
|
||||
print HEADER "$typedef\n\n" if $typedef ne '';
|
||||
|
||||
my $pageLen = $charsPerPage / $charsPerEntry;
|
||||
print DATA_TABLES "static const $type s${prefix}Values[$chCount][$pageLen] = {\n";
|
||||
for (my $i = 0; $i < scalar @char; ++$i) {
|
||||
print DATA_TABLES " {";
|
||||
print DATA_TABLES $char[$i];
|
||||
|
@ -667,9 +818,12 @@ sub genTables
|
|||
}
|
||||
print DATA_TABLES "};\n\n";
|
||||
|
||||
print STDERR "Data for $prefix = ", $pmCount*$indexLen*$pmBits/8 +
|
||||
$chCount*$dataLen*(($type =~ /32/) ? 4 : 1) +
|
||||
$maxPlane, "\n";
|
||||
my $dataSize = $pmCount * $indexLen * $pmBits/8 +
|
||||
$chCount * $pageLen * $bytesPerEntry +
|
||||
$maxPlane;
|
||||
$totalData += $dataSize;
|
||||
|
||||
print STDERR "Data for $prefix = $dataSize\n";
|
||||
}
|
||||
|
||||
print DATA_TABLES <<__END;
|
||||
|
@ -680,24 +834,6 @@ __END
|
|||
|
||||
close DATA_TABLES;
|
||||
|
||||
open HEADER, "> nsUnicodeScriptCodes.h" or die "unable to open nsUnicodeScriptCodes.h for output";
|
||||
|
||||
print HEADER <<__END;
|
||||
$licenseBlock
|
||||
/*
|
||||
* Created on $timestamp from UCD data files with version info:
|
||||
*
|
||||
|
||||
$versionInfo
|
||||
|
||||
*
|
||||
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
|
||||
*/
|
||||
|
||||
#ifndef NS_UNICODE_SCRIPT_CODES
|
||||
#define NS_UNICODE_SCRIPT_CODES
|
||||
__END
|
||||
|
||||
print HEADER "enum {\n";
|
||||
for (my $i = 0; $i < scalar @scriptCodeToName; ++$i) {
|
||||
print HEADER " MOZ_SCRIPT_", $scriptCodeToName[$i], " = ", $i, ",\n";
|
||||
|
|
|
@ -38,7 +38,6 @@
|
|||
#define HB_DONT_DEFINE_STDINT 1
|
||||
|
||||
#include "nsUnicodeProperties.h"
|
||||
#include "nsUnicodeScriptCodes.h"
|
||||
#include "nsUnicodePropertyData.cpp"
|
||||
|
||||
#include "mozilla/Util.h"
|
||||
|
@ -48,6 +47,53 @@
|
|||
#define UNICODE_BMP_LIMIT 0x10000
|
||||
#define UNICODE_LIMIT 0x110000
|
||||
|
||||
|
||||
nsCharProps1
|
||||
GetCharProps1(PRUint32 aCh)
|
||||
{
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
return sCharProp1Values[sCharProp1Pages[0][aCh >> kCharProp1CharBits]]
|
||||
[aCh & ((1 << kCharProp1CharBits) - 1)];
|
||||
}
|
||||
if (aCh < (kCharProp1MaxPlane + 1) * 0x10000) {
|
||||
return sCharProp1Values[sCharProp1Pages[sCharProp1Planes[(aCh >> 16) - 1]]
|
||||
[(aCh & 0xffff) >> kCharProp1CharBits]]
|
||||
[aCh & ((1 << kCharProp1CharBits) - 1)];
|
||||
}
|
||||
|
||||
// Default values for unassigned
|
||||
nsCharProps1 undefined = {0, // Index to mirrored char offsets
|
||||
0, // Hangul Syllable type
|
||||
0}; // Combining class
|
||||
return undefined;
|
||||
}
|
||||
|
||||
nsCharProps2
|
||||
GetCharProps2(PRUint32 aCh)
|
||||
{
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
return sCharProp2Values[sCharProp2Pages[0][aCh >> kCharProp2CharBits]]
|
||||
[aCh & ((1 << kCharProp2CharBits) - 1)];
|
||||
}
|
||||
if (aCh < (kCharProp2MaxPlane + 1) * 0x10000) {
|
||||
return sCharProp2Values[sCharProp2Pages[sCharProp2Planes[(aCh >> 16) - 1]]
|
||||
[(aCh & 0xffff) >> kCharProp2CharBits]]
|
||||
[aCh & ((1 << kCharProp2CharBits) - 1)];
|
||||
}
|
||||
|
||||
NS_NOTREACHED("Getting CharProps for codepoint outside Unicode range");
|
||||
// Default values for unassigned
|
||||
nsCharProps2 undefined = {
|
||||
MOZ_SCRIPT_UNKNOWN, // Script code
|
||||
0, // East Asian Width
|
||||
HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, // General Category
|
||||
eCharType_LeftToRight, // Bidi Category
|
||||
mozilla::unicode::XIDMOD_NOT_CHARS, // Xidmod
|
||||
-1 // Numeric Value
|
||||
};
|
||||
return undefined;
|
||||
}
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
namespace unicode {
|
||||
|
@ -113,81 +159,7 @@ nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[] = {
|
|||
PRUint32
|
||||
GetMirroredChar(PRUint32 aCh)
|
||||
{
|
||||
// all mirrored chars are in plane 0
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
int v = sMirrorValues[sMirrorPages[0][aCh >> kMirrorCharBits]]
|
||||
[aCh & ((1 << kMirrorCharBits) - 1)];
|
||||
// The mirror value is stored as either an offset (if less than
|
||||
// kSmallMirrorOffset) from the input character code, or as
|
||||
// an index into the sDistantMirrors list. This allows the
|
||||
// mirrored codes to be stored as 8-bit values, as most of them
|
||||
// are references to nearby character codes.
|
||||
if (v < kSmallMirrorOffset) {
|
||||
return aCh + v;
|
||||
}
|
||||
return sDistantMirrors[v - kSmallMirrorOffset];
|
||||
}
|
||||
return aCh;
|
||||
}
|
||||
|
||||
PRUint8
|
||||
GetCombiningClass(PRUint32 aCh)
|
||||
{
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
return sCClassValues[sCClassPages[0][aCh >> kCClassCharBits]]
|
||||
[aCh & ((1 << kCClassCharBits) - 1)];
|
||||
}
|
||||
if (aCh < (kCClassMaxPlane + 1) * 0x10000) {
|
||||
return sCClassValues[sCClassPages[sCClassPlanes[(aCh >> 16) - 1]]
|
||||
[(aCh & 0xffff) >> kCClassCharBits]]
|
||||
[aCh & ((1 << kCClassCharBits) - 1)];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
PRUint8
|
||||
GetGeneralCategory(PRUint32 aCh)
|
||||
{
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
return sCatEAWValues[sCatEAWPages[0][aCh >> kCatEAWCharBits]]
|
||||
[aCh & ((1 << kCatEAWCharBits) - 1)].mCategory;
|
||||
}
|
||||
if (aCh < (kCatEAWMaxPlane + 1) * 0x10000) {
|
||||
return sCatEAWValues[sCatEAWPages[sCatEAWPlanes[(aCh >> 16) - 1]]
|
||||
[(aCh & 0xffff) >> kCatEAWCharBits]]
|
||||
[aCh & ((1 << kCatEAWCharBits) - 1)].mCategory;
|
||||
}
|
||||
return PRUint8(HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED);
|
||||
}
|
||||
|
||||
PRUint8
|
||||
GetEastAsianWidth(PRUint32 aCh)
|
||||
{
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
return sCatEAWValues[sCatEAWPages[0][aCh >> kCatEAWCharBits]]
|
||||
[aCh & ((1 << kCatEAWCharBits) - 1)].mEAW;
|
||||
}
|
||||
if (aCh < (kCatEAWMaxPlane + 1) * 0x10000) {
|
||||
return sCatEAWValues[sCatEAWPages[sCatEAWPlanes[(aCh >> 16) - 1]]
|
||||
[(aCh & 0xffff) >> kCatEAWCharBits]]
|
||||
[aCh & ((1 << kCatEAWCharBits) - 1)].mEAW;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
PRInt32
|
||||
GetScriptCode(PRUint32 aCh)
|
||||
{
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
return sScriptValues[sScriptPages[0][aCh >> kScriptCharBits]]
|
||||
[aCh & ((1 << kScriptCharBits) - 1)];
|
||||
}
|
||||
if (aCh < (kScriptMaxPlane + 1) * 0x10000) {
|
||||
return sScriptValues[sScriptPages[sScriptPlanes[(aCh >> 16) - 1]]
|
||||
[(aCh & 0xffff) >> kScriptCharBits]]
|
||||
[aCh & ((1 << kScriptCharBits) - 1)];
|
||||
}
|
||||
return MOZ_SCRIPT_UNKNOWN;
|
||||
return aCh + sMirrorOffsets[GetCharProps1(aCh).mMirrorOffsetIndex];
|
||||
}
|
||||
|
||||
PRUint32
|
||||
|
@ -200,17 +172,6 @@ GetScriptTagForCode(PRInt32 aScriptCode)
|
|||
return sScriptCodeToTag[aScriptCode];
|
||||
}
|
||||
|
||||
HSType
|
||||
GetHangulSyllableType(PRUint32 aCh)
|
||||
{
|
||||
// all Hangul chars are in plane 0
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
return HSType(sHangulValues[sHangulPages[0][aCh >> kHangulCharBits]]
|
||||
[aCh & ((1 << kHangulCharBits) - 1)]);
|
||||
}
|
||||
return HST_NONE;
|
||||
}
|
||||
|
||||
static inline PRUint32
|
||||
GetCaseMapValue(PRUint32 aCh)
|
||||
{
|
||||
|
@ -275,6 +236,25 @@ GetTitlecaseForAll(PRUint32 aCh)
|
|||
return aCh;
|
||||
}
|
||||
|
||||
HanVariantType
|
||||
GetHanVariant(PRUint32 aCh)
|
||||
{
|
||||
// In the sHanVariantValues array, data for 4 successive characters
|
||||
// (2 bits each) is packed in to each PRUint8 entry, with the value
|
||||
// for the lowest character stored in the least significant bits.
|
||||
PRUint8 v = 0;
|
||||
if (aCh < UNICODE_BMP_LIMIT) {
|
||||
v = sHanVariantValues[sHanVariantPages[0][aCh >> kHanVariantCharBits]]
|
||||
[(aCh & ((1 << kHanVariantCharBits) - 1)) >> 2];
|
||||
} else if (aCh < (kHanVariantMaxPlane + 1) * 0x10000) {
|
||||
v = sHanVariantValues[sHanVariantPages[sHanVariantPlanes[(aCh >> 16) - 1]]
|
||||
[(aCh & 0xffff) >> kHanVariantCharBits]]
|
||||
[(aCh & ((1 << kHanVariantCharBits) - 1)) >> 2];
|
||||
}
|
||||
// extract the appropriate 2-bit field from the value
|
||||
return HanVariantType((v >> ((aCh & 3) * 2)) & 3);
|
||||
}
|
||||
|
||||
bool
|
||||
IsClusterExtender(PRUint32 aCh, PRUint8 aCategory)
|
||||
{
|
||||
|
|
|
@ -39,7 +39,12 @@
|
|||
#define NS_UNICODEPROPERTIES_H
|
||||
|
||||
#include "prtypes.h"
|
||||
#include "nsBidiUtils.h"
|
||||
#include "nsIUGenCategory.h"
|
||||
#include "nsUnicodeScriptCodes.h"
|
||||
|
||||
nsCharProps1 GetCharProps1(PRUint32 aCh);
|
||||
nsCharProps2 GetCharProps2(PRUint32 aCh);
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
|
@ -49,38 +54,93 @@ extern nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[];
|
|||
|
||||
PRUint32 GetMirroredChar(PRUint32 aCh);
|
||||
|
||||
PRUint8 GetCombiningClass(PRUint32 aCh);
|
||||
inline PRUint8 GetCombiningClass(PRUint32 aCh) {
|
||||
return GetCharProps1(aCh).mCombiningClass;
|
||||
}
|
||||
|
||||
// returns the detailed General Category in terms of HB_UNICODE_* values
|
||||
PRUint8 GetGeneralCategory(PRUint32 aCh);
|
||||
inline PRUint8 GetGeneralCategory(PRUint32 aCh) {
|
||||
return GetCharProps2(aCh).mCategory;
|
||||
}
|
||||
|
||||
// returns the simplified Gen Category as defined in nsIUGenCategory
|
||||
inline nsIUGenCategory::nsUGenCategory GetGenCategory(PRUint32 aCh) {
|
||||
return sDetailedToGeneralCategory[GetGeneralCategory(aCh)];
|
||||
}
|
||||
|
||||
PRUint8 GetEastAsianWidth(PRUint32 aCh);
|
||||
inline PRUint8 GetEastAsianWidth(PRUint32 aCh) {
|
||||
return GetCharProps2(aCh).mEAW;
|
||||
}
|
||||
|
||||
PRInt32 GetScriptCode(PRUint32 aCh);
|
||||
inline PRUint8 GetScriptCode(PRUint32 aCh) {
|
||||
return GetCharProps2(aCh).mScriptCode;
|
||||
}
|
||||
|
||||
PRUint32 GetScriptTagForCode(PRInt32 aScriptCode);
|
||||
|
||||
inline nsCharType GetBidiCat(PRUint32 aCh) {
|
||||
return nsCharType(GetCharProps2(aCh).mBidiCategory);
|
||||
}
|
||||
|
||||
enum XidmodType {
|
||||
XIDMOD_INCLUSION,
|
||||
XIDMOD_RECOMMENDED,
|
||||
XIDMOD_DEFAULT_IGNORABLE,
|
||||
XIDMOD_HISTORIC,
|
||||
XIDMOD_LIMITED_USE,
|
||||
XIDMOD_NOT_NFKC,
|
||||
XIDMOD_NOT_XID,
|
||||
XIDMOD_OBSOLETE,
|
||||
XIDMOD_TECHNICAL,
|
||||
XIDMOD_NOT_CHARS
|
||||
};
|
||||
|
||||
inline XidmodType GetIdentifierModification(PRUint32 aCh) {
|
||||
return XidmodType(GetCharProps2(aCh).mXidmod);
|
||||
}
|
||||
|
||||
inline bool IsRestrictedForIdentifiers(PRUint32 aCh) {
|
||||
XidmodType xm = GetIdentifierModification(aCh);
|
||||
return (xm > XIDMOD_RECOMMENDED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the numeric value of the character. The value returned is the value
|
||||
* of the Numeric_Value in field 7 of the UCD, or -1 if field 7 is empty.
|
||||
* To restrict to decimal digits, the caller should also check whether
|
||||
* GetGeneralCategory returns HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER
|
||||
*/
|
||||
inline PRInt8 GetNumericValue(PRUint32 aCh) {
|
||||
return GetCharProps2(aCh).mNumericValue;
|
||||
}
|
||||
|
||||
enum HanVariantType {
|
||||
HVT_NotHan = 0x0,
|
||||
HVT_SimplifiedOnly = 0x1,
|
||||
HVT_TraditionalOnly = 0x2,
|
||||
HVT_AnyHan = 0x3
|
||||
};
|
||||
|
||||
HanVariantType GetHanVariant(PRUint32 aCh);
|
||||
|
||||
bool IsClusterExtender(PRUint32 aCh, PRUint8 aCategory);
|
||||
|
||||
inline bool IsClusterExtender(PRUint32 aCh) {
|
||||
return IsClusterExtender(aCh, GetGeneralCategory(aCh));
|
||||
return IsClusterExtender(aCh, GetGeneralCategory(aCh));
|
||||
}
|
||||
|
||||
enum HSType {
|
||||
HST_NONE = 0x00,
|
||||
HST_L = 0x01,
|
||||
HST_V = 0x02,
|
||||
HST_T = 0x04,
|
||||
HST_LV = 0x03,
|
||||
HST_LVT = 0x07
|
||||
HST_NONE = 0x00,
|
||||
HST_L = 0x01,
|
||||
HST_V = 0x02,
|
||||
HST_T = 0x04,
|
||||
HST_LV = 0x03,
|
||||
HST_LVT = 0x07
|
||||
};
|
||||
|
||||
HSType GetHangulSyllableType(PRUint32 aCh);
|
||||
inline HSType GetHangulSyllableType(PRUint32 aCh) {
|
||||
return HSType(GetCharProps1(aCh).mHangulType);
|
||||
}
|
||||
|
||||
// Case mappings for the full Unicode range;
|
||||
// note that it may be worth testing for ASCII chars and taking
|
||||
|
@ -91,13 +151,13 @@ PRUint32 GetTitlecaseForLower(PRUint32 aCh); // maps LC to titlecase, UC unchang
|
|||
PRUint32 GetTitlecaseForAll(PRUint32 aCh); // maps both UC and LC to titlecase
|
||||
|
||||
enum ShapingType {
|
||||
SHAPING_DEFAULT = 0x0001,
|
||||
SHAPING_ARABIC = 0x0002,
|
||||
SHAPING_HEBREW = 0x0004,
|
||||
SHAPING_HANGUL = 0x0008,
|
||||
SHAPING_MONGOLIAN = 0x0010,
|
||||
SHAPING_INDIC = 0x0020,
|
||||
SHAPING_THAI = 0x0040
|
||||
SHAPING_DEFAULT = 0x0001,
|
||||
SHAPING_ARABIC = 0x0002,
|
||||
SHAPING_HEBREW = 0x0004,
|
||||
SHAPING_HANGUL = 0x0008,
|
||||
SHAPING_MONGOLIAN = 0x0010,
|
||||
SHAPING_INDIC = 0x0020,
|
||||
SHAPING_THAI = 0x0040
|
||||
};
|
||||
|
||||
PRInt32 ScriptShapingType(PRInt32 aScriptCode);
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -41,7 +41,7 @@
|
|||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
/*
|
||||
* Created on Mon Apr 23 14:51:01 2012 from UCD data files with version info:
|
||||
* Created on Mon Apr 23 20:03:29 2012 from UCD data files with version info:
|
||||
*
|
||||
|
||||
# Date: 2012-01-26, 22:03:00 GMT [KW]
|
||||
|
@ -70,12 +70,36 @@ for the Unicode Character Database (UCD) for Unicode 6.1.0.
|
|||
# HangulSyllableType-6.1.0.txt
|
||||
# Date: 2011-08-25, 00:02:18 GMT [MD]
|
||||
|
||||
# File: xidmodifications.txt
|
||||
# Version: 2.1
|
||||
# Generated: 2010-04-13, 01:33:09 GMT
|
||||
|
||||
#
|
||||
# Unihan_Variants.txt
|
||||
# Date: 2011-08-08 22:10:53 GMT [JHJ]
|
||||
|
||||
*
|
||||
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
|
||||
*/
|
||||
|
||||
#ifndef NS_UNICODE_SCRIPT_CODES
|
||||
#define NS_UNICODE_SCRIPT_CODES
|
||||
struct nsCharProps1 {
|
||||
unsigned char mMirrorOffsetIndex:5;
|
||||
unsigned char mHangulType:3;
|
||||
unsigned char mCombiningClass:8;
|
||||
};
|
||||
|
||||
struct nsCharProps2 {
|
||||
unsigned char mScriptCode:8;
|
||||
unsigned char mEAW:3;
|
||||
unsigned char mCategory:5;
|
||||
unsigned char mBidiCategory:5;
|
||||
unsigned char mXidmod:4;
|
||||
signed char mNumericValue:5;
|
||||
unsigned char mHanVariant:2;
|
||||
};
|
||||
|
||||
enum {
|
||||
MOZ_SCRIPT_COMMON = 0,
|
||||
MOZ_SCRIPT_INHERITED = 1,
|
||||
|
|
Загрузка…
Ссылка в новой задаче