bug 738101 - add support for more Unicode properties. r=smontagu

This commit is contained in:
Jonathan Kew 2012-04-23 07:20:11 -07:00
Родитель a62d4c83ae
Коммит c73b93163a
5 изменённых файлов: 1528 добавлений и 825 удалений

Просмотреть файл

@ -54,9 +54,14 @@
# - BidiMirroring.txt
# - HangulSyllableType.txt
# - ReadMe.txt (to record version/date of the UCD)
# - Unihan_Variants.txt (from Unihan.zip)
# though this may change if we find a need for additional properties.
#
# The Unicode data files should be together in a single directory.
# The Unicode data files listed above should be together in one directory.
# We also require the file
# http://www.unicode.org/Public/security/latest/xidmodifications.txt
# This file should be in a sub-directory "security" immediately below the
# directory containing the other Unicode data files.
#
# (2) Run this tool using a command line of the form
#
@ -74,6 +79,7 @@
# in the current directory.
use strict;
use List::Util qw(first);
if ($#ARGV != 1) {
print <<__EOT;
@ -251,6 +257,41 @@ while (<FH>) {
}
close FH;
my %xidmodCode = (
'inclusion' => 0,
'recommended' => 1,
'default-ignorable' => 2,
'historic' => 3,
'limited-use' => 4,
'not-NFKC' => 5,
'not-xid' => 6,
'obsolete' => 7,
'technical' => 8,
'not-chars' => 9
);
my %bidicategoryCode = (
"L" => "0", # Left-to-Right
"R" => "1", # Right-to-Left
"EN" => "2", # European Number
"ES" => "3", # European Number Separator
"ET" => "4", # European Number Terminator
"AN" => "5", # Arabic Number
"CS" => "6", # Common Number Separator
"B" => "7", # Paragraph Separator
"S" => "8", # Segment Separator
"WS" => "9", # Whitespace
"ON" => "10", # Other Neutrals
"LRE" => "11", # Left-to-Right Embedding
"LRO" => "12", # Left-to-Right Override
"AL" => "13", # Right-to-Left Arabic
"RLE" => "14", # Right-to-Left Embedding
"RLO" => "15", # Right-to-Left Override
"PDF" => "16", # Pop Directional Format
"NSM" => "17", # Non-Spacing Mark
"BN" => "18" # Boundary Neutral
);
# initialize default properties
my @script;
my @category;
@ -259,11 +300,27 @@ my @eaw;
my @mirror;
my @hangul;
my @casemap;
my @xidmod;
my @numericvalue;
my @hanVariant;
my @bidicategory;
for (my $i = 0; $i < 0x110000; ++$i) {
$script[$i] = $scriptCode{"UNKNOWN"};
$category[$i] = $catCode{"UNASSIGNED"};
$combining[$i] = 0;
$casemap[$i] = 0;
$xidmod[$i] = $xidmodCode{"not-chars"};
$numericvalue[$i] = -1;
$hanVariant[$i] = 0;
$bidicategory[$i] = $bidicategoryCode{"L"};
}
# blocks where the default for bidi category is not L
for my $i (0x0600..0x07BF, 0x08A0..0x08FF, 0xFB50..0xFDCF, 0xFDF0..0xFDFF, 0xFE70..0xFEFF, 0x1EE00..0x0001EEFF) {
$bidicategory[$i] = $bidicategoryCode{"AL"};
}
for my $i (0x0590..0x05FF, 0x07C0..0x089F, 0xFB1D..0xFB4F, 0x00010800..0x00010FFF, 0x0001E800..0x0001EDFF, 0x0001EF00..0x0001EFFF) {
$bidicategory[$i] = $bidicategoryCode{"R"};
}
my %ucd2hb = (
@ -328,6 +385,13 @@ while (<FH>) {
do {
$category[$first] = $catCode{$ucd2hb{$fields[2]}};
$combining[$first] = $fields[3];
$bidicategory[$first] = $bidicategoryCode{$fields[4]};
unless (length($fields[7]) == 0) {
$numericvalue[$first] = $fields[7];
}
if ($fields[1] =~ /CJK/) {
@hanVariant[$first] = 3;
}
$first++;
} while ($first <= $last);
} else {
@ -358,6 +422,13 @@ while (<FH>) {
$casemap[$usv] |= $kLowerToUpper;
$casemap[$usv] |= ($usv ^ $upper);
}
$bidicategory[$usv] = $bidicategoryCode{$fields[4]};
unless (length($fields[7]) == 0) {
$numericvalue[$usv] = $fields[7];
}
if ($fields[1] =~ /CJK/) {
@hanVariant[$usv] = 3;
}
}
}
close FH;
@ -416,8 +487,9 @@ while (<FH>) {
close FH;
# read BidiMirroring.txt
my @distantMirrors = ();
my $smallMirrorOffset = 64;
my @offsets = ();
push @offsets, 0;
open FH, "< $ARGV[1]/BidiMirroring.txt" or die "can't open UCD file BidiMirroring.txt\n";
push @versionInfo, "";
while (<FH>) {
@ -429,13 +501,13 @@ while (<FH>) {
s/#.*//;
if (m/([0-9A-F]{4,6});\s*([0-9A-F]{4,6})/) {
my $mirrorOffset = hex("0x$2") - hex("0x$1");
if ($mirrorOffset < $smallMirrorOffset && $mirrorOffset >= -128) {
$mirror[hex "0x$1"] = $mirrorOffset;
} else {
die "too many distant mirror codes\n" if scalar @distantMirrors == 128 - $smallMirrorOffset;
$mirror[hex "0x$1"] = $smallMirrorOffset + scalar @distantMirrors;
push @distantMirrors, hex("0x$2");
my $offsetIndex = first { $offsets[$_] eq $mirrorOffset } 0..$#offsets;
if ($offsetIndex == undef) {
die "too many offset codes\n" if scalar @offsets == 31;
push @offsets, $mirrorOffset;
$offsetIndex = $#offsets;
}
$mirror[hex "0x$1"] = $offsetIndex;
}
}
close FH;
@ -470,6 +542,66 @@ while (<FH>) {
}
close FH;
# read xidmodifications.txt
open FH, "< $ARGV[1]/security/xidmodifications.txt" or die "can't open UCD file xidmodifications.txt\n";
push @versionInfo, "";
while (<FH>) {
chomp;
unless (/\xef\xbb\xbf/) {
push @versionInfo, $_;
}
last if /Generated:/;
}
while (<FH>) {
if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+[^ ]+\s+;\s+([^ ]+)/) {
my $xidmod = $3;
warn "unknown Identifier Modification $xidmod" unless exists $xidmodCode{$xidmod};
$xidmod = $xidmodCode{$xidmod};
my $start = hex "0x$1";
my $end = (defined $2) ? hex "0x$2" : $start;
for (my $i = $start; $i <= $end; ++$i) {
$xidmod[$i] = $xidmod;
}
}
}
close FH;
open FH, "< $ARGV[1]/Unihan_Variants.txt" or die "can't open UCD file Unihan_Variants.txt (from Unihan.zip)\n";
push @versionInfo, "";
while (<FH>) {
chomp;
push @versionInfo, $_;
last if /Date:/;
}
my $savedusv = 0;
my $hasTC = 0;
my $hasSC = 0;
while (<FH>) {
chomp;
if (m/U\+([0-9A-F]{4,6})\s+k([^ ]+)Variant/) {
my $usv = hex "0x$1";
if ($usv != $savedusv) {
unless ($savedusv == 0) {
if ($hasTC && !$hasSC) {
$hanVariant[$savedusv] = 1;
} elsif (!$hasTC && $hasSC) {
$hanVariant[$savedusv] = 2;
}
}
$savedusv = $usv;
$hasTC = 0;
$hasSC = 0;
}
if ($2 eq "Traditional") {
$hasTC = 1;
}
if ($2 eq "Simplified") {
$hasSC = 1;
}
}
}
close FH;
my $timestamp = gmtime();
open DATA_TABLES, "> nsUnicodePropertyData.cpp" or die "unable to open nsUnicodePropertyData.cpp for output";
@ -532,10 +664,28 @@ $versionInfo
*/
#include "mozilla/StandardInteger.h"
#include "harfbuzz/hb-common.h"
#include "harfbuzz/hb.h"
__END
open HEADER, "> nsUnicodeScriptCodes.h" or die "unable to open nsUnicodeScriptCodes.h for output";
print HEADER <<__END;
$licenseBlock
/*
* Created on $timestamp from UCD data files with version info:
*
$versionInfo
*
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
*/
#ifndef NS_UNICODE_SCRIPT_CODES
#define NS_UNICODE_SCRIPT_CODES
__END
print DATA_TABLES "static const PRUint32 sScriptCodeToTag[] = {\n";
for (my $i = 0; $i < scalar @scriptCodeToTag; ++$i) {
printf DATA_TABLES " HB_TAG(%s)", $scriptCodeToTag[$i];
@ -543,56 +693,54 @@ for (my $i = 0; $i < scalar @scriptCodeToTag; ++$i) {
}
print DATA_TABLES "};\n\n";
sub sprintScript
{
my $usv = shift;
return sprintf("%d,", $script[$usv]);
}
&genTables("Script", "PRUint8", 10, 6, \&sprintScript, 16);
our $totalData = 0;
sub sprintCC
{
my $usv = shift;
return sprintf("%d,", $combining[$usv]);
}
&genTables("CClass", "PRUint8", 10, 6, \&sprintCC, 1);
print DATA_TABLES "static const PRInt32 kSmallMirrorOffset = $smallMirrorOffset;\n";
print DATA_TABLES "static const PRUint16 sDistantMirrors[] = {\n";
for (my $i = 0; $i < scalar @distantMirrors; ++$i) {
printf DATA_TABLES " 0x%04X", $distantMirrors[$i];
print DATA_TABLES $i < $#distantMirrors ? ",\n" : "\n";
print DATA_TABLES "static const PRInt16 sMirrorOffsets[] = {\n";
for (my $i = 0; $i < scalar @offsets; ++$i) {
printf DATA_TABLES " $offsets[$i]";
print DATA_TABLES $i < $#offsets ? ",\n" : "\n";
}
print DATA_TABLES "};\n\n";
sub sprintMirror
sub sprintCharProps1
{
my $usv = shift;
return sprintf("%d,", $mirror[$usv]);
return sprintf("{%d,%d,%d}, ", $mirror[$usv], $hangul[$usv], $combining[$usv]);
}
&genTables("Mirror", "PRInt8", 9, 7, \&sprintMirror, 0);
&genTables("CharProp1", "struct nsCharProps1 {\n unsigned char mMirrorOffsetIndex:5;\n unsigned char mHangulType:3;\n unsigned char mCombiningClass:8;\n};",
"nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1);
sub sprintCatEAW
sub sprintCharProps2
{
my $usv = shift;
return sprintf("{%d,%d},", $eaw[$usv], $category[$usv]);
return sprintf("{%d,%d,%d,%d,%d,%d},",
$script[$usv], $eaw[$usv], $category[$usv],
$bidicategory[$usv], $xidmod[$usv], $numericvalue[$usv]);
}
&genTables("CatEAW", "struct {\n unsigned char mEAW:3;\n unsigned char mCategory:5;\n}",
9, 7, \&sprintCatEAW, 16);
&genTables("CharProp2", "struct nsCharProps2 {\n unsigned char mScriptCode:8;\n unsigned char mEAW:3;\n unsigned char mCategory:5;\n unsigned char mBidiCategory:5;\n unsigned char mXidmod:4;\n signed char mNumericValue:5;\n unsigned char mHanVariant:2;\n};",
"nsCharProps2", 11, 5, \&sprintCharProps2, 16, 4, 1);
sub sprintHangulType
sub sprintHanVariants
{
my $usv = shift;
return sprintf("%d,", $hangul[$usv]);
my $baseUsv = shift;
my $varShift = 0;
my $val = 0;
while ($varShift < 8) {
$val |= $hanVariant[$baseUsv++] << $varShift;
$varShift += 2;
}
return sprintf("0x%02x,", $val);
}
&genTables("Hangul", "PRUint8", 10, 6, \&sprintHangulType, 0);
&genTables("HanVariant", "", "PRUint8", 9, 7, \&sprintHanVariants, 2, 1, 4);
sub sprintCasemap
{
my $usv = shift;
return sprintf("0x%08x,", $casemap[$usv]);
}
&genTables("CaseMap", "PRUint32", 11, 5, \&sprintCasemap, 1);
&genTables("CaseMap", "", "PRUint32", 11, 5, \&sprintCasemap, 1, 4, 1);
print STDERR "Total data = $totalData\n";
printf DATA_TABLES "const PRUint32 kTitleToUpper = 0x%08x;\n", $kTitleToUpper;
printf DATA_TABLES "const PRUint32 kUpperToLower = 0x%08x;\n", $kUpperToLower;
@ -602,14 +750,14 @@ printf DATA_TABLES "const PRUint32 kCaseMapCharMask = 0x%08x;\n\n", $kCaseMapCha
sub genTables
{
my ($prefix, $type, $indexBits, $charBits, $func, $maxPlane) = @_;
my ($prefix, $typedef, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_;
print DATA_TABLES "#define k${prefix}MaxPlane $maxPlane\n";
print DATA_TABLES "#define k${prefix}IndexBits $indexBits\n";
print DATA_TABLES "#define k${prefix}CharBits $charBits\n";
my $indexLen = 1 << $indexBits;
my $dataLen = 1 << $charBits;
my $charsPerPage = 1 << $charBits;
my %charIndex = ();
my %pageMapIndex = ();
my @pageMap = ();
@ -620,8 +768,8 @@ sub genTables
my $pageMap = "\x00" x $indexLen * 2;
foreach my $page (0 .. $indexLen - 1) {
my $charValues = "";
foreach my $ch (0 .. $dataLen - 1) {
my $usv = $plane * 0x10000 + $page * $dataLen + $ch;
for (my $ch = 0; $ch < $charsPerPage; $ch += $charsPerEntry) {
my $usv = $plane * 0x10000 + $page * $charsPerPage + $ch;
$charValues .= &$func($usv);
}
chop $charValues;
@ -659,7 +807,10 @@ sub genTables
}
print DATA_TABLES "};\n\n";
print DATA_TABLES "static const $type s${prefix}Values[$chCount][$dataLen] = {\n";
print HEADER "$typedef\n\n" if $typedef ne '';
my $pageLen = $charsPerPage / $charsPerEntry;
print DATA_TABLES "static const $type s${prefix}Values[$chCount][$pageLen] = {\n";
for (my $i = 0; $i < scalar @char; ++$i) {
print DATA_TABLES " {";
print DATA_TABLES $char[$i];
@ -667,9 +818,12 @@ sub genTables
}
print DATA_TABLES "};\n\n";
print STDERR "Data for $prefix = ", $pmCount*$indexLen*$pmBits/8 +
$chCount*$dataLen*(($type =~ /32/) ? 4 : 1) +
$maxPlane, "\n";
my $dataSize = $pmCount * $indexLen * $pmBits/8 +
$chCount * $pageLen * $bytesPerEntry +
$maxPlane;
$totalData += $dataSize;
print STDERR "Data for $prefix = $dataSize\n";
}
print DATA_TABLES <<__END;
@ -680,24 +834,6 @@ __END
close DATA_TABLES;
open HEADER, "> nsUnicodeScriptCodes.h" or die "unable to open nsUnicodeScriptCodes.h for output";
print HEADER <<__END;
$licenseBlock
/*
* Created on $timestamp from UCD data files with version info:
*
$versionInfo
*
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
*/
#ifndef NS_UNICODE_SCRIPT_CODES
#define NS_UNICODE_SCRIPT_CODES
__END
print HEADER "enum {\n";
for (my $i = 0; $i < scalar @scriptCodeToName; ++$i) {
print HEADER " MOZ_SCRIPT_", $scriptCodeToName[$i], " = ", $i, ",\n";

Просмотреть файл

@ -38,7 +38,6 @@
#define HB_DONT_DEFINE_STDINT 1
#include "nsUnicodeProperties.h"
#include "nsUnicodeScriptCodes.h"
#include "nsUnicodePropertyData.cpp"
#include "mozilla/Util.h"
@ -48,6 +47,53 @@
#define UNICODE_BMP_LIMIT 0x10000
#define UNICODE_LIMIT 0x110000
nsCharProps1
GetCharProps1(PRUint32 aCh)
{
if (aCh < UNICODE_BMP_LIMIT) {
return sCharProp1Values[sCharProp1Pages[0][aCh >> kCharProp1CharBits]]
[aCh & ((1 << kCharProp1CharBits) - 1)];
}
if (aCh < (kCharProp1MaxPlane + 1) * 0x10000) {
return sCharProp1Values[sCharProp1Pages[sCharProp1Planes[(aCh >> 16) - 1]]
[(aCh & 0xffff) >> kCharProp1CharBits]]
[aCh & ((1 << kCharProp1CharBits) - 1)];
}
// Default values for unassigned
nsCharProps1 undefined = {0, // Index to mirrored char offsets
0, // Hangul Syllable type
0}; // Combining class
return undefined;
}
nsCharProps2
GetCharProps2(PRUint32 aCh)
{
if (aCh < UNICODE_BMP_LIMIT) {
return sCharProp2Values[sCharProp2Pages[0][aCh >> kCharProp2CharBits]]
[aCh & ((1 << kCharProp2CharBits) - 1)];
}
if (aCh < (kCharProp2MaxPlane + 1) * 0x10000) {
return sCharProp2Values[sCharProp2Pages[sCharProp2Planes[(aCh >> 16) - 1]]
[(aCh & 0xffff) >> kCharProp2CharBits]]
[aCh & ((1 << kCharProp2CharBits) - 1)];
}
NS_NOTREACHED("Getting CharProps for codepoint outside Unicode range");
// Default values for unassigned
nsCharProps2 undefined = {
MOZ_SCRIPT_UNKNOWN, // Script code
0, // East Asian Width
HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, // General Category
eCharType_LeftToRight, // Bidi Category
mozilla::unicode::XIDMOD_NOT_CHARS, // Xidmod
-1 // Numeric Value
};
return undefined;
}
namespace mozilla {
namespace unicode {
@ -113,81 +159,7 @@ nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[] = {
PRUint32
GetMirroredChar(PRUint32 aCh)
{
// all mirrored chars are in plane 0
if (aCh < UNICODE_BMP_LIMIT) {
int v = sMirrorValues[sMirrorPages[0][aCh >> kMirrorCharBits]]
[aCh & ((1 << kMirrorCharBits) - 1)];
// The mirror value is stored as either an offset (if less than
// kSmallMirrorOffset) from the input character code, or as
// an index into the sDistantMirrors list. This allows the
// mirrored codes to be stored as 8-bit values, as most of them
// are references to nearby character codes.
if (v < kSmallMirrorOffset) {
return aCh + v;
}
return sDistantMirrors[v - kSmallMirrorOffset];
}
return aCh;
}
PRUint8
GetCombiningClass(PRUint32 aCh)
{
if (aCh < UNICODE_BMP_LIMIT) {
return sCClassValues[sCClassPages[0][aCh >> kCClassCharBits]]
[aCh & ((1 << kCClassCharBits) - 1)];
}
if (aCh < (kCClassMaxPlane + 1) * 0x10000) {
return sCClassValues[sCClassPages[sCClassPlanes[(aCh >> 16) - 1]]
[(aCh & 0xffff) >> kCClassCharBits]]
[aCh & ((1 << kCClassCharBits) - 1)];
}
return 0;
}
PRUint8
GetGeneralCategory(PRUint32 aCh)
{
if (aCh < UNICODE_BMP_LIMIT) {
return sCatEAWValues[sCatEAWPages[0][aCh >> kCatEAWCharBits]]
[aCh & ((1 << kCatEAWCharBits) - 1)].mCategory;
}
if (aCh < (kCatEAWMaxPlane + 1) * 0x10000) {
return sCatEAWValues[sCatEAWPages[sCatEAWPlanes[(aCh >> 16) - 1]]
[(aCh & 0xffff) >> kCatEAWCharBits]]
[aCh & ((1 << kCatEAWCharBits) - 1)].mCategory;
}
return PRUint8(HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED);
}
PRUint8
GetEastAsianWidth(PRUint32 aCh)
{
if (aCh < UNICODE_BMP_LIMIT) {
return sCatEAWValues[sCatEAWPages[0][aCh >> kCatEAWCharBits]]
[aCh & ((1 << kCatEAWCharBits) - 1)].mEAW;
}
if (aCh < (kCatEAWMaxPlane + 1) * 0x10000) {
return sCatEAWValues[sCatEAWPages[sCatEAWPlanes[(aCh >> 16) - 1]]
[(aCh & 0xffff) >> kCatEAWCharBits]]
[aCh & ((1 << kCatEAWCharBits) - 1)].mEAW;
}
return 0;
}
PRInt32
GetScriptCode(PRUint32 aCh)
{
if (aCh < UNICODE_BMP_LIMIT) {
return sScriptValues[sScriptPages[0][aCh >> kScriptCharBits]]
[aCh & ((1 << kScriptCharBits) - 1)];
}
if (aCh < (kScriptMaxPlane + 1) * 0x10000) {
return sScriptValues[sScriptPages[sScriptPlanes[(aCh >> 16) - 1]]
[(aCh & 0xffff) >> kScriptCharBits]]
[aCh & ((1 << kScriptCharBits) - 1)];
}
return MOZ_SCRIPT_UNKNOWN;
return aCh + sMirrorOffsets[GetCharProps1(aCh).mMirrorOffsetIndex];
}
PRUint32
@ -200,17 +172,6 @@ GetScriptTagForCode(PRInt32 aScriptCode)
return sScriptCodeToTag[aScriptCode];
}
HSType
GetHangulSyllableType(PRUint32 aCh)
{
// all Hangul chars are in plane 0
if (aCh < UNICODE_BMP_LIMIT) {
return HSType(sHangulValues[sHangulPages[0][aCh >> kHangulCharBits]]
[aCh & ((1 << kHangulCharBits) - 1)]);
}
return HST_NONE;
}
static inline PRUint32
GetCaseMapValue(PRUint32 aCh)
{
@ -275,6 +236,25 @@ GetTitlecaseForAll(PRUint32 aCh)
return aCh;
}
HanVariantType
GetHanVariant(PRUint32 aCh)
{
// In the sHanVariantValues array, data for 4 successive characters
// (2 bits each) is packed in to each PRUint8 entry, with the value
// for the lowest character stored in the least significant bits.
PRUint8 v = 0;
if (aCh < UNICODE_BMP_LIMIT) {
v = sHanVariantValues[sHanVariantPages[0][aCh >> kHanVariantCharBits]]
[(aCh & ((1 << kHanVariantCharBits) - 1)) >> 2];
} else if (aCh < (kHanVariantMaxPlane + 1) * 0x10000) {
v = sHanVariantValues[sHanVariantPages[sHanVariantPlanes[(aCh >> 16) - 1]]
[(aCh & 0xffff) >> kHanVariantCharBits]]
[(aCh & ((1 << kHanVariantCharBits) - 1)) >> 2];
}
// extract the appropriate 2-bit field from the value
return HanVariantType((v >> ((aCh & 3) * 2)) & 3);
}
bool
IsClusterExtender(PRUint32 aCh, PRUint8 aCategory)
{

Просмотреть файл

@ -39,7 +39,12 @@
#define NS_UNICODEPROPERTIES_H
#include "prtypes.h"
#include "nsBidiUtils.h"
#include "nsIUGenCategory.h"
#include "nsUnicodeScriptCodes.h"
nsCharProps1 GetCharProps1(PRUint32 aCh);
nsCharProps2 GetCharProps2(PRUint32 aCh);
namespace mozilla {
@ -49,38 +54,93 @@ extern nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[];
PRUint32 GetMirroredChar(PRUint32 aCh);
PRUint8 GetCombiningClass(PRUint32 aCh);
inline PRUint8 GetCombiningClass(PRUint32 aCh) {
return GetCharProps1(aCh).mCombiningClass;
}
// returns the detailed General Category in terms of HB_UNICODE_* values
PRUint8 GetGeneralCategory(PRUint32 aCh);
inline PRUint8 GetGeneralCategory(PRUint32 aCh) {
return GetCharProps2(aCh).mCategory;
}
// returns the simplified Gen Category as defined in nsIUGenCategory
inline nsIUGenCategory::nsUGenCategory GetGenCategory(PRUint32 aCh) {
return sDetailedToGeneralCategory[GetGeneralCategory(aCh)];
}
PRUint8 GetEastAsianWidth(PRUint32 aCh);
inline PRUint8 GetEastAsianWidth(PRUint32 aCh) {
return GetCharProps2(aCh).mEAW;
}
PRInt32 GetScriptCode(PRUint32 aCh);
inline PRUint8 GetScriptCode(PRUint32 aCh) {
return GetCharProps2(aCh).mScriptCode;
}
PRUint32 GetScriptTagForCode(PRInt32 aScriptCode);
inline nsCharType GetBidiCat(PRUint32 aCh) {
return nsCharType(GetCharProps2(aCh).mBidiCategory);
}
enum XidmodType {
XIDMOD_INCLUSION,
XIDMOD_RECOMMENDED,
XIDMOD_DEFAULT_IGNORABLE,
XIDMOD_HISTORIC,
XIDMOD_LIMITED_USE,
XIDMOD_NOT_NFKC,
XIDMOD_NOT_XID,
XIDMOD_OBSOLETE,
XIDMOD_TECHNICAL,
XIDMOD_NOT_CHARS
};
inline XidmodType GetIdentifierModification(PRUint32 aCh) {
return XidmodType(GetCharProps2(aCh).mXidmod);
}
inline bool IsRestrictedForIdentifiers(PRUint32 aCh) {
XidmodType xm = GetIdentifierModification(aCh);
return (xm > XIDMOD_RECOMMENDED);
}
/**
* Return the numeric value of the character. The value returned is the value
* of the Numeric_Value in field 7 of the UCD, or -1 if field 7 is empty.
* To restrict to decimal digits, the caller should also check whether
* GetGeneralCategory returns HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER
*/
inline PRInt8 GetNumericValue(PRUint32 aCh) {
return GetCharProps2(aCh).mNumericValue;
}
enum HanVariantType {
HVT_NotHan = 0x0,
HVT_SimplifiedOnly = 0x1,
HVT_TraditionalOnly = 0x2,
HVT_AnyHan = 0x3
};
HanVariantType GetHanVariant(PRUint32 aCh);
bool IsClusterExtender(PRUint32 aCh, PRUint8 aCategory);
inline bool IsClusterExtender(PRUint32 aCh) {
return IsClusterExtender(aCh, GetGeneralCategory(aCh));
return IsClusterExtender(aCh, GetGeneralCategory(aCh));
}
enum HSType {
HST_NONE = 0x00,
HST_L = 0x01,
HST_V = 0x02,
HST_T = 0x04,
HST_LV = 0x03,
HST_LVT = 0x07
HST_NONE = 0x00,
HST_L = 0x01,
HST_V = 0x02,
HST_T = 0x04,
HST_LV = 0x03,
HST_LVT = 0x07
};
HSType GetHangulSyllableType(PRUint32 aCh);
inline HSType GetHangulSyllableType(PRUint32 aCh) {
return HSType(GetCharProps1(aCh).mHangulType);
}
// Case mappings for the full Unicode range;
// note that it may be worth testing for ASCII chars and taking
@ -91,13 +151,13 @@ PRUint32 GetTitlecaseForLower(PRUint32 aCh); // maps LC to titlecase, UC unchang
PRUint32 GetTitlecaseForAll(PRUint32 aCh); // maps both UC and LC to titlecase
enum ShapingType {
SHAPING_DEFAULT = 0x0001,
SHAPING_ARABIC = 0x0002,
SHAPING_HEBREW = 0x0004,
SHAPING_HANGUL = 0x0008,
SHAPING_MONGOLIAN = 0x0010,
SHAPING_INDIC = 0x0020,
SHAPING_THAI = 0x0040
SHAPING_DEFAULT = 0x0001,
SHAPING_ARABIC = 0x0002,
SHAPING_HEBREW = 0x0004,
SHAPING_HANGUL = 0x0008,
SHAPING_MONGOLIAN = 0x0010,
SHAPING_INDIC = 0x0020,
SHAPING_THAI = 0x0040
};
PRInt32 ScriptShapingType(PRInt32 aScriptCode);

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -41,7 +41,7 @@
* ***** END LICENSE BLOCK ***** */
/*
* Created on Mon Apr 23 14:51:01 2012 from UCD data files with version info:
* Created on Mon Apr 23 20:03:29 2012 from UCD data files with version info:
*
# Date: 2012-01-26, 22:03:00 GMT [KW]
@ -70,12 +70,36 @@ for the Unicode Character Database (UCD) for Unicode 6.1.0.
# HangulSyllableType-6.1.0.txt
# Date: 2011-08-25, 00:02:18 GMT [MD]
# File: xidmodifications.txt
# Version: 2.1
# Generated: 2010-04-13, 01:33:09 GMT
#
# Unihan_Variants.txt
# Date: 2011-08-08 22:10:53 GMT [JHJ]
*
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
*/
#ifndef NS_UNICODE_SCRIPT_CODES
#define NS_UNICODE_SCRIPT_CODES
struct nsCharProps1 {
unsigned char mMirrorOffsetIndex:5;
unsigned char mHangulType:3;
unsigned char mCombiningClass:8;
};
struct nsCharProps2 {
unsigned char mScriptCode:8;
unsigned char mEAW:3;
unsigned char mCategory:5;
unsigned char mBidiCategory:5;
unsigned char mXidmod:4;
signed char mNumericValue:5;
unsigned char mHanVariant:2;
};
enum {
MOZ_SCRIPT_COMMON = 0,
MOZ_SCRIPT_INHERITED = 1,