зеркало из https://github.com/mozilla/gecko-dev.git
Bug 724538 - When ICU is available in the build, replace most of nsCharProps2 fields with ICU property accessors. r=emk
This commit is contained in:
Родитель
534e2d442e
Коммит
06f42574aa
|
@ -1625,9 +1625,14 @@ gfxPangoFontGroup::FindFontForChar(uint32_t aCh, uint32_t aPrevCh,
|
|||
nextFont = 1;
|
||||
}
|
||||
|
||||
// Pango, GLib, and Thebes (but not harfbuzz!) all happen to use the same
|
||||
// script codes, so we can just cast the value here.
|
||||
const PangoScript script = static_cast<PangoScript>(aRunScript);
|
||||
// Our MOZ_SCRIPT_* codes may not match the PangoScript enumeration values
|
||||
// (if we're using ICU's codes), so convert by mapping through ISO 15924 tag.
|
||||
// Note that PangoScript is defined to be compatible with GUnicodeScript:
|
||||
// https://developer.gnome.org/pango/stable/pango-Scripts-and-Languages.html#PangoScript
|
||||
const hb_tag_t scriptTag = GetScriptTagForCode(aRunScript);
|
||||
const PangoScript script =
|
||||
(const PangoScript)g_unicode_script_from_iso15924(scriptTag);
|
||||
|
||||
// Might be nice to call pango_language_includes_script only once for the
|
||||
// run rather than for each character.
|
||||
PangoLanguage *scriptLang;
|
||||
|
@ -1654,19 +1659,6 @@ gfxPangoFontGroup::FindFontForChar(uint32_t aCh, uint32_t aPrevCh,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// Sanity-check: spot-check a few constants to confirm that Thebes and
|
||||
// Pango script codes really do match
|
||||
#define CHECK_SCRIPT_CODE(script) \
|
||||
PR_STATIC_ASSERT(int32_t(MOZ_SCRIPT_##script) == \
|
||||
int32_t(PANGO_SCRIPT_##script))
|
||||
|
||||
CHECK_SCRIPT_CODE(COMMON);
|
||||
CHECK_SCRIPT_CODE(INHERITED);
|
||||
CHECK_SCRIPT_CODE(ARABIC);
|
||||
CHECK_SCRIPT_CODE(LATIN);
|
||||
CHECK_SCRIPT_CODE(UNKNOWN);
|
||||
CHECK_SCRIPT_CODE(NKO);
|
||||
|
||||
/**
|
||||
** gfxFcFont
|
||||
**/
|
||||
|
|
|
@ -158,19 +158,11 @@ gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
|
|||
}
|
||||
}
|
||||
|
||||
// Get the nsCharProps2 record for the current character,
|
||||
// so we can read the script and (if needed) the gen category
|
||||
// without needing to do two multi-level lookups.
|
||||
// NOTE that this means we're relying on an implementation detail
|
||||
// of the nsUnicodeProperties tables, and might have to revise this
|
||||
// if the nsCharProps records used there are modified in future.
|
||||
const nsCharProps2& charProps = GetCharProps2(ch);
|
||||
|
||||
// Initialize gc to UNASSIGNED; we'll only set it to the true GC
|
||||
// if the character has script=COMMON, otherwise we don't care.
|
||||
uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
||||
|
||||
sc = charProps.mScriptCode;
|
||||
sc = GetScriptCode(ch);
|
||||
if (sc == MOZ_SCRIPT_COMMON) {
|
||||
/*
|
||||
* Paired character handling:
|
||||
|
@ -183,7 +175,7 @@ gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
|
|||
* We only do this if the script is COMMON; for chars with
|
||||
* specific script assignments, we just use them as-is.
|
||||
*/
|
||||
gc = charProps.mCategory;
|
||||
GetGeneralCategory(ch);
|
||||
if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
|
||||
uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
|
||||
if (endPairChar != ch) {
|
||||
|
|
|
@ -40,8 +40,9 @@
|
|||
#
|
||||
# (2) Run this tool using a command line of the form
|
||||
#
|
||||
# perl genUnicodePropertyData.pl \
|
||||
# /path/to/harfbuzz/src \
|
||||
# perl genUnicodePropertyData.pl \
|
||||
# /path/to/harfbuzz/src \
|
||||
# /path/to/icu/common/unicode \
|
||||
# /path/to/UCD-directory
|
||||
#
|
||||
# This will generate (or overwrite!) the files
|
||||
|
@ -54,15 +55,17 @@
|
|||
use strict;
|
||||
use List::Util qw(first);
|
||||
|
||||
if ($#ARGV != 1) {
|
||||
if ($#ARGV != 2) {
|
||||
print <<__EOT;
|
||||
# Run this tool using a command line of the form
|
||||
#
|
||||
# perl genUnicodePropertyData.pl \\
|
||||
# /path/to/harfbuzz/src \\
|
||||
# perl genUnicodePropertyData.pl \\
|
||||
# /path/to/harfbuzz/src \\
|
||||
# /path/to/icu/common/unicode \\
|
||||
# /path/to/UCD-directory
|
||||
#
|
||||
# where harfbuzz/src is the directory containing harfbuzz .cc and .hh files,
|
||||
# icu/common/unicode is the directory containing ICU 'common' public headers,
|
||||
# and UCD-directory is a directory containing the current Unicode Character
|
||||
# Database files (UnicodeData.txt, etc), available from
|
||||
# http://www.unicode.org/Public/UNIDATA/, with additional resources as
|
||||
|
@ -78,190 +81,20 @@ __EOT
|
|||
exit 0;
|
||||
}
|
||||
|
||||
# load HB_Script and HB_Category constants
|
||||
my $HARFBUZZ = $ARGV[0];
|
||||
my $ICU = $ARGV[1];
|
||||
my $UNICODE = $ARGV[2];
|
||||
|
||||
# NOTE that HB_SCRIPT_* constants are now "tag" values, NOT sequentially-allocated
|
||||
# script codes as used by Glib/Pango/etc.
|
||||
# We therefore define a set of MOZ_SCRIPT_* constants that are script _codes_
|
||||
# compatible with those libraries, and map these to HB_SCRIPT_* _tags_ as needed.
|
||||
# load HB_Category constants
|
||||
|
||||
# CHECK that this matches Pango source (as found for example at
|
||||
# http://git.gnome.org/browse/pango/tree/pango/pango-script.h)
|
||||
# for as many codes as that defines (currently up through Unicode 5.1)
|
||||
# and the GLib enumeration
|
||||
# http://developer.gnome.org/glib/2.30/glib-Unicode-Manipulation.html#GUnicodeScript
|
||||
# (currently defined up through Unicode 6.0).
|
||||
# Constants beyond these may be regarded as unstable for now, but we don't actually
|
||||
# depend on the specific values.
|
||||
my %scriptCode = (
|
||||
INVALID => -1,
|
||||
COMMON => 0,
|
||||
INHERITED => 1,
|
||||
ARABIC => 2,
|
||||
ARMENIAN => 3,
|
||||
BENGALI => 4,
|
||||
BOPOMOFO => 5,
|
||||
CHEROKEE => 6,
|
||||
COPTIC => 7,
|
||||
CYRILLIC => 8,
|
||||
DESERET => 9,
|
||||
DEVANAGARI => 10,
|
||||
ETHIOPIC => 11,
|
||||
GEORGIAN => 12,
|
||||
GOTHIC => 13,
|
||||
GREEK => 14,
|
||||
GUJARATI => 15,
|
||||
GURMUKHI => 16,
|
||||
HAN => 17,
|
||||
HANGUL => 18,
|
||||
HEBREW => 19,
|
||||
HIRAGANA => 20,
|
||||
KANNADA => 21,
|
||||
KATAKANA => 22,
|
||||
KHMER => 23,
|
||||
LAO => 24,
|
||||
LATIN => 25,
|
||||
MALAYALAM => 26,
|
||||
MONGOLIAN => 27,
|
||||
MYANMAR => 28,
|
||||
OGHAM => 29,
|
||||
OLD_ITALIC => 30,
|
||||
ORIYA => 31,
|
||||
RUNIC => 32,
|
||||
SINHALA => 33,
|
||||
SYRIAC => 34,
|
||||
TAMIL => 35,
|
||||
TELUGU => 36,
|
||||
THAANA => 37,
|
||||
THAI => 38,
|
||||
TIBETAN => 39,
|
||||
CANADIAN_ABORIGINAL => 40,
|
||||
YI => 41,
|
||||
TAGALOG => 42,
|
||||
HANUNOO => 43,
|
||||
BUHID => 44,
|
||||
TAGBANWA => 45,
|
||||
# unicode 4.0 additions
|
||||
BRAILLE => 46,
|
||||
CYPRIOT => 47,
|
||||
LIMBU => 48,
|
||||
OSMANYA => 49,
|
||||
SHAVIAN => 50,
|
||||
LINEAR_B => 51,
|
||||
TAI_LE => 52,
|
||||
UGARITIC => 53,
|
||||
# unicode 4.1 additions
|
||||
NEW_TAI_LUE => 54,
|
||||
BUGINESE => 55,
|
||||
GLAGOLITIC => 56,
|
||||
TIFINAGH => 57,
|
||||
SYLOTI_NAGRI => 58,
|
||||
OLD_PERSIAN => 59,
|
||||
KHAROSHTHI => 60,
|
||||
# unicode 5.0 additions
|
||||
UNKNOWN => 61,
|
||||
BALINESE => 62,
|
||||
CUNEIFORM => 63,
|
||||
PHOENICIAN => 64,
|
||||
PHAGS_PA => 65,
|
||||
NKO => 66,
|
||||
# unicode 5.1 additions
|
||||
KAYAH_LI => 67,
|
||||
LEPCHA => 68,
|
||||
REJANG => 69,
|
||||
SUNDANESE => 70,
|
||||
SAURASHTRA => 71,
|
||||
CHAM => 72,
|
||||
OL_CHIKI => 73,
|
||||
VAI => 74,
|
||||
CARIAN => 75,
|
||||
LYCIAN => 76,
|
||||
LYDIAN => 77,
|
||||
# unicode 5.2 additions
|
||||
AVESTAN => 78,
|
||||
BAMUM => 79,
|
||||
EGYPTIAN_HIEROGLYPHS => 80,
|
||||
IMPERIAL_ARAMAIC => 81,
|
||||
INSCRIPTIONAL_PAHLAVI => 82,
|
||||
INSCRIPTIONAL_PARTHIAN => 83,
|
||||
JAVANESE => 84,
|
||||
KAITHI => 85,
|
||||
LISU => 86,
|
||||
MEETEI_MAYEK => 87,
|
||||
OLD_SOUTH_ARABIAN => 88,
|
||||
OLD_TURKIC => 89,
|
||||
SAMARITAN => 90,
|
||||
TAI_THAM => 91,
|
||||
TAI_VIET => 92,
|
||||
# unicode 6.0 additions
|
||||
BATAK => 93,
|
||||
BRAHMI => 94,
|
||||
MANDAIC => 95,
|
||||
# unicode 6.1 additions
|
||||
CHAKMA => 96,
|
||||
MEROITIC_CURSIVE => 97,
|
||||
MEROITIC_HIEROGLYPHS => 98,
|
||||
MIAO => 99,
|
||||
SHARADA => 100,
|
||||
SORA_SOMPENG => 101,
|
||||
TAKRI => 102,
|
||||
# unicode 7.0 additions
|
||||
BASSA_VAH => 103,
|
||||
CAUCASIAN_ALBANIAN => 104,
|
||||
DUPLOYAN => 105,
|
||||
ELBASAN => 106,
|
||||
GRANTHA => 107,
|
||||
KHOJKI => 108,
|
||||
KHUDAWADI => 109,
|
||||
LINEAR_A => 110,
|
||||
MAHAJANI => 111,
|
||||
MANICHAEAN => 112,
|
||||
MENDE_KIKAKUI => 113,
|
||||
MODI => 114,
|
||||
MRO => 115,
|
||||
NABATAEAN => 116,
|
||||
OLD_NORTH_ARABIAN => 117,
|
||||
OLD_PERMIC => 118,
|
||||
PAHAWH_HMONG => 119,
|
||||
PALMYRENE => 120,
|
||||
PAU_CIN_HAU => 121,
|
||||
PSALTER_PAHLAVI => 122,
|
||||
SIDDHAM => 123,
|
||||
TIRHUTA => 124,
|
||||
WARANG_CITI => 125,
|
||||
# unicode 8.0 additions
|
||||
AHOM => 126,
|
||||
ANATOLIAN_HIEROGLYPHS => 127,
|
||||
HATRAN => 128,
|
||||
MULTANI => 129,
|
||||
OLD_HUNGARIAN => 130,
|
||||
SIGNWRITING => 131,
|
||||
|
||||
# additional "script" code, not from Unicode (but matches ISO 15924's Zmth tag)
|
||||
MATHEMATICAL_NOTATION => 132,
|
||||
);
|
||||
|
||||
my $sc = -1;
|
||||
my $cc = -1;
|
||||
my %catCode;
|
||||
my @scriptCodeToTag;
|
||||
my @scriptCodeToName;
|
||||
|
||||
sub readHarfBuzzHeader
|
||||
{
|
||||
my $file = shift;
|
||||
open FH, "< $ARGV[0]/$file" or die "can't open harfbuzz header $ARGV[0]/$file\n";
|
||||
open FH, "< $HARFBUZZ/$file" or die "can't open harfbuzz header $HARFBUZZ/$file\n";
|
||||
while (<FH>) {
|
||||
s/CANADIAN_SYLLABICS/CANADIAN_ABORIGINAL/; # harfbuzz and unicode disagree on this name :(
|
||||
if (m/HB_SCRIPT_([A-Z_]+)\s*=\s*HB_TAG\s*\(('.','.','.','.')\)\s*,/) {
|
||||
unless (exists $scriptCode{$1}) {
|
||||
warn "unknown script name $1 found in $file\n";
|
||||
next;
|
||||
}
|
||||
$sc = $scriptCode{$1};
|
||||
$scriptCodeToTag[$sc] = $2;
|
||||
$scriptCodeToName[$sc] = $1;
|
||||
}
|
||||
if (m/HB_UNICODE_GENERAL_CATEGORY_([A-Z_]+)/) {
|
||||
$cc++;
|
||||
$catCode{$1} = $cc;
|
||||
|
@ -270,16 +103,40 @@ sub readHarfBuzzHeader
|
|||
close FH;
|
||||
}
|
||||
|
||||
&readHarfBuzzHeader("hb-common.h");
|
||||
&readHarfBuzzHeader("hb-unicode.h");
|
||||
|
||||
die "didn't find HarfBuzz script codes\n" if $sc == -1;
|
||||
die "didn't find HarfBuzz category codes\n" if $cc == -1;
|
||||
|
||||
# Additional code not present in HarfBuzz headers:
|
||||
$sc = $scriptCode{"MATHEMATICAL_NOTATION"};
|
||||
$scriptCodeToTag[$sc] = "'Z','m','t','h'";
|
||||
$scriptCodeToName[$sc] = "MATHEMATICAL_NOTATION";
|
||||
my %scriptCode;
|
||||
my @scriptCodeToTag;
|
||||
my @scriptCodeToName;
|
||||
|
||||
my $sc = -1;
|
||||
|
||||
sub readIcuHeader
|
||||
{
|
||||
my $file = shift;
|
||||
open FH, "< $ICU/$file" or die "can't open ICU header $ICU/$file\n";
|
||||
while (<FH>) {
|
||||
# adjust for ICU vs UCD naming discrepancies
|
||||
s/LANNA/TAI_THAM/;
|
||||
s/MEITEI_MAYEK/MEETEI_MAYEK/;
|
||||
s/ORKHON/OLD_TURKIC/;
|
||||
s/MENDE/MENDE_KIKAKUI/;
|
||||
s/SIGN_WRITING/SIGNWRITING/;
|
||||
if (m|USCRIPT_([A-Z_]+)\s*=\s*([0-9]+),\s*/\*\s*([A-Z][a-z]{3})\s*\*/|) {
|
||||
$sc = $2;
|
||||
$scriptCode{$1} = $sc;
|
||||
$scriptCodeToTag[$sc] = $3;
|
||||
$scriptCodeToName[$sc] = $1;
|
||||
}
|
||||
}
|
||||
close FH;
|
||||
}
|
||||
|
||||
&readIcuHeader("uscript.h");
|
||||
|
||||
die "didn't find ICU script codes\n" if $sc == -1;
|
||||
|
||||
my %xidmodCode = (
|
||||
'Recommended' => 0,
|
||||
|
@ -317,9 +174,9 @@ my %bidicategoryCode = (
|
|||
"PDF" => "16", # Pop Directional Format
|
||||
"NSM" => "17", # Non-Spacing Mark
|
||||
"BN" => "18", # Boundary Neutral
|
||||
"LRI" => "19", # Left-to-Right Isolate
|
||||
"RLI" => "20", # Right-to-left Isolate
|
||||
"FSI" => "21", # First Strong Isolate
|
||||
"FSI" => "19", # First Strong Isolate
|
||||
"LRI" => "20", # Left-to-Right Isolate
|
||||
"RLI" => "21", # Right-to-left Isolate
|
||||
"PDI" => "22" # Pop Direcitonal Isolate
|
||||
);
|
||||
|
||||
|
@ -404,7 +261,7 @@ my %ucd2hb = (
|
|||
|
||||
# read ReadMe.txt
|
||||
my @versionInfo;
|
||||
open FH, "< $ARGV[1]/ReadMe.txt" or die "can't open Unicode ReadMe.txt file\n";
|
||||
open FH, "< $UNICODE/ReadMe.txt" or die "can't open Unicode ReadMe.txt file\n";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
push @versionInfo, $_;
|
||||
|
@ -418,7 +275,7 @@ my $kLowerToUpper = 0x10000000;
|
|||
my $kCaseMapCharMask = 0x001fffff;
|
||||
|
||||
# read UnicodeData.txt
|
||||
open FH, "< $ARGV[1]/UnicodeData.txt" or die "can't open UCD file UnicodeData.txt\n";
|
||||
open FH, "< $UNICODE/UnicodeData.txt" or die "can't open UCD file UnicodeData.txt\n";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
my @fields = split /;/;
|
||||
|
@ -490,7 +347,7 @@ while (<FH>) {
|
|||
close FH;
|
||||
|
||||
# read Scripts.txt
|
||||
open FH, "< $ARGV[1]/Scripts.txt" or die "can't open UCD file Scripts.txt\n";
|
||||
open FH, "< $UNICODE/Scripts.txt" or die "can't open UCD file Scripts.txt\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
|
@ -500,8 +357,8 @@ while (<FH>) {
|
|||
while (<FH>) {
|
||||
if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+([^ ]+)/) {
|
||||
my $script = uc($3);
|
||||
warn "unknown script $script" unless exists $scriptCode{$script};
|
||||
$script = $scriptCode{$script};
|
||||
warn "unknown ICU script $script" unless exists $scriptCode{$script};
|
||||
my $script = $scriptCode{$script};
|
||||
my $start = hex "0x$1";
|
||||
my $end = (defined $2) ? hex "0x$2" : $start;
|
||||
for (my $i = $start; $i <= $end; ++$i) {
|
||||
|
@ -515,7 +372,7 @@ close FH;
|
|||
my @offsets = ();
|
||||
push @offsets, 0;
|
||||
|
||||
open FH, "< $ARGV[1]/BidiMirroring.txt" or die "can't open UCD file BidiMirroring.txt\n";
|
||||
open FH, "< $UNICODE/BidiMirroring.txt" or die "can't open UCD file BidiMirroring.txt\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
|
@ -543,7 +400,7 @@ my %pairedBracketTypeCode = (
|
|||
'O' => 1,
|
||||
'C' => 2
|
||||
);
|
||||
open FH, "< $ARGV[1]/BidiBrackets.txt" or die "can't open UCD file BidiBrackets.txt\n";
|
||||
open FH, "< $UNICODE/BidiBrackets.txt" or die "can't open UCD file BidiBrackets.txt\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
|
@ -570,7 +427,7 @@ my %hangulType = (
|
|||
'LV' => 0x03,
|
||||
'LVT' => 0x07
|
||||
);
|
||||
open FH, "< $ARGV[1]/HangulSyllableType.txt" or die "can't open UCD file HangulSyllableType.txt\n";
|
||||
open FH, "< $UNICODE/HangulSyllableType.txt" or die "can't open UCD file HangulSyllableType.txt\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
|
@ -593,7 +450,7 @@ while (<FH>) {
|
|||
close FH;
|
||||
|
||||
# read xidmodifications.txt
|
||||
open FH, "< $ARGV[1]/security/xidmodifications.txt" or die "can't open UCD file xidmodifications.txt\n";
|
||||
open FH, "< $UNICODE/security/xidmodifications.txt" or die "can't open UCD file xidmodifications.txt\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
|
@ -616,7 +473,7 @@ while (<FH>) {
|
|||
}
|
||||
close FH;
|
||||
|
||||
open FH, "< $ARGV[1]/Unihan_Variants.txt" or die "can't open UCD file Unihan_Variants.txt (from Unihan.zip)\n";
|
||||
open FH, "< $UNICODE/Unihan_Variants.txt" or die "can't open UCD file Unihan_Variants.txt (from Unihan.zip)\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
|
@ -653,7 +510,7 @@ while (<FH>) {
|
|||
close FH;
|
||||
|
||||
# read VerticalOrientation-13.txt
|
||||
open FH, "< $ARGV[1]/vertical/VerticalOrientation-13.txt" or die "can't open UTR50 data file VerticalOrientation-13.txt\n";
|
||||
open FH, "< $UNICODE/vertical/VerticalOrientation-13.txt" or die "can't open UTR50 data file VerticalOrientation-13.txt\n";
|
||||
push @versionInfo, "";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
|
@ -732,21 +589,25 @@ $versionInfo
|
|||
|
||||
__END
|
||||
|
||||
print DATA_TABLES "#if !ENABLE_INTL_API\n";
|
||||
print DATA_TABLES "static const uint32_t sScriptCodeToTag[] = {\n";
|
||||
for (my $i = 0; $i < scalar @scriptCodeToTag; ++$i) {
|
||||
printf DATA_TABLES " HB_TAG(%s)", $scriptCodeToTag[$i];
|
||||
printf DATA_TABLES " HB_TAG('%c','%c','%c','%c')", unpack('cccc', $scriptCodeToTag[$i]);
|
||||
print DATA_TABLES $i < $#scriptCodeToTag ? ",\n" : "\n";
|
||||
}
|
||||
print DATA_TABLES "};\n\n";
|
||||
print DATA_TABLES "};\n";
|
||||
print DATA_TABLES "#endif\n\n";
|
||||
|
||||
our $totalData = 0;
|
||||
|
||||
print DATA_TABLES "#if !ENABLE_INTL_API\n";
|
||||
print DATA_TABLES "static const int16_t sMirrorOffsets[] = {\n";
|
||||
for (my $i = 0; $i < scalar @offsets; ++$i) {
|
||||
printf DATA_TABLES " $offsets[$i]";
|
||||
print DATA_TABLES $i < $#offsets ? ",\n" : "\n";
|
||||
}
|
||||
print DATA_TABLES "};\n\n";
|
||||
print DATA_TABLES "};\n";
|
||||
print DATA_TABLES "#endif\n\n";
|
||||
|
||||
print HEADER "#pragma pack(1)\n\n";
|
||||
|
||||
|
@ -762,11 +623,26 @@ struct nsCharProps1 {
|
|||
unsigned char mCombiningClass:8;
|
||||
};
|
||||
/;
|
||||
print DATA_TABLES "#ifndef ENABLE_INTL_API\n";
|
||||
&genTables("CharProp1", $type, "nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1);
|
||||
print DATA_TABLES "#endif\n\n";
|
||||
&genTables("#if !ENABLE_INTL_API", "#endif",
|
||||
"CharProp1", $type, "nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1);
|
||||
|
||||
sub sprintCharProps2
|
||||
sub sprintCharProps2_short
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("{%d,%d,%d},",
|
||||
$pairedBracketType[$usv], $verticalOrientation[$usv], $xidmod[$usv]);
|
||||
}
|
||||
$type = q/
|
||||
struct nsCharProps2 {
|
||||
unsigned char mPairedBracketType:2;
|
||||
unsigned char mVertOrient:2;
|
||||
unsigned char mXidmod:4;
|
||||
};
|
||||
/;
|
||||
&genTables("#if ENABLE_INTL_API", "#endif",
|
||||
"CharProp2", $type, "nsCharProps2", 9, 7, \&sprintCharProps2_short, 16, 1, 1);
|
||||
|
||||
sub sprintCharProps2_full
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("{%d,%d,%d,%d,%d,%d,%d},",
|
||||
|
@ -785,7 +661,8 @@ struct nsCharProps2 {
|
|||
unsigned char mVertOrient:2;
|
||||
};
|
||||
|;
|
||||
&genTables("CharProp2", $type, "nsCharProps2", 11, 5, \&sprintCharProps2, 16, 4, 1);
|
||||
&genTables("#if !ENABLE_INTL_API", "#endif",
|
||||
"CharProp2", $type, "nsCharProps2", 11, 5, \&sprintCharProps2_full, 16, 4, 1);
|
||||
|
||||
print HEADER "#pragma pack()\n\n";
|
||||
|
||||
|
@ -800,21 +677,22 @@ sub sprintHanVariants
|
|||
}
|
||||
return sprintf("0x%02x,", $val);
|
||||
}
|
||||
&genTables("HanVariant", "", "uint8_t", 9, 7, \&sprintHanVariants, 2, 1, 4);
|
||||
## Han Variant data currently unused but may be needed in future, see bug 857481
|
||||
## &genTables("", "", "HanVariant", "", "uint8_t", 9, 7, \&sprintHanVariants, 2, 1, 4);
|
||||
|
||||
sub sprintFullWidth
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("0x%04x,", $fullWidth[$usv]);
|
||||
}
|
||||
&genTables("FullWidth", "", "uint16_t", 10, 6, \&sprintFullWidth, 0, 2, 1);
|
||||
&genTables("", "", "FullWidth", "", "uint16_t", 10, 6, \&sprintFullWidth, 0, 2, 1);
|
||||
|
||||
sub sprintCasemap
|
||||
{
|
||||
my $usv = shift;
|
||||
return sprintf("0x%08x,", $casemap[$usv]);
|
||||
}
|
||||
&genTables("CaseMap", "", "uint32_t", 11, 5, \&sprintCasemap, 1, 4, 1);
|
||||
&genTables("", "", "CaseMap", "", "uint32_t", 11, 5, \&sprintCasemap, 1, 4, 1);
|
||||
|
||||
print STDERR "Total data = $totalData\n";
|
||||
|
||||
|
@ -826,8 +704,16 @@ printf DATA_TABLES "const uint32_t kCaseMapCharMask = 0x%08x;\n\n", $kCaseMapCha
|
|||
|
||||
sub genTables
|
||||
{
|
||||
my ($prefix, $typedef, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_;
|
||||
my ($guardBegin, $guardEnd,
|
||||
$prefix, $typedef, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_;
|
||||
|
||||
if ($typedef ne '') {
|
||||
print HEADER "$guardBegin\n";
|
||||
print HEADER "$typedef\n";
|
||||
print HEADER "$guardEnd\n\n";
|
||||
}
|
||||
|
||||
print DATA_TABLES "\n$guardBegin\n";
|
||||
print DATA_TABLES "#define k${prefix}MaxPlane $maxPlane\n";
|
||||
print DATA_TABLES "#define k${prefix}IndexBits $indexBits\n";
|
||||
print DATA_TABLES "#define k${prefix}CharBits $charBits\n";
|
||||
|
@ -888,8 +774,6 @@ sub genTables
|
|||
}
|
||||
print DATA_TABLES "};\n\n";
|
||||
|
||||
print HEADER "$typedef\n\n" if $typedef ne '';
|
||||
|
||||
my $pageLen = $charsPerPage / $charsPerEntry;
|
||||
print DATA_TABLES "static const $type s${prefix}Values[$chCount][$pageLen] = {\n";
|
||||
for (my $i = 0; $i < scalar @char; ++$i) {
|
||||
|
@ -897,7 +781,8 @@ sub genTables
|
|||
print DATA_TABLES $char[$i];
|
||||
print DATA_TABLES $i < $#char ? "},\n" : "}\n";
|
||||
}
|
||||
print DATA_TABLES "};\n\n";
|
||||
print DATA_TABLES "};\n";
|
||||
print DATA_TABLES "$guardEnd\n";
|
||||
|
||||
my $dataSize = $pmCount * $indexLen * $pmBits/8 +
|
||||
$chCount * $pageLen * $bytesPerEntry +
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
* for the detailed definition of the following categories
|
||||
*
|
||||
* The values here must match the equivalents in %bidicategorycode in
|
||||
* mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl
|
||||
* mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
|
||||
* and must also match the values used by ICU's UCharDirection.
|
||||
*/
|
||||
|
||||
enum nsCharType {
|
||||
|
@ -37,9 +38,9 @@ enum nsCharType {
|
|||
eCharType_PopDirectionalFormat = 16,
|
||||
eCharType_DirNonSpacingMark = 17,
|
||||
eCharType_BoundaryNeutral = 18,
|
||||
eCharType_LeftToRightIsolate = 19,
|
||||
eCharType_RightToLeftIsolate = 20,
|
||||
eCharType_FirstStrongIsolate = 21,
|
||||
eCharType_FirstStrongIsolate = 19,
|
||||
eCharType_LeftToRightIsolate = 20,
|
||||
eCharType_RightToLeftIsolate = 21,
|
||||
eCharType_PopDirectionalIsolate = 22,
|
||||
eCharType_CharTypeCount
|
||||
};
|
||||
|
|
|
@ -11,12 +11,12 @@
|
|||
|
||||
#if ENABLE_INTL_API
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uscript.h"
|
||||
#endif
|
||||
|
||||
#define UNICODE_BMP_LIMIT 0x10000
|
||||
#define UNICODE_LIMIT 0x110000
|
||||
|
||||
|
||||
#ifndef ENABLE_INTL_API
|
||||
static const nsCharProps1&
|
||||
GetCharProps1(uint32_t aCh)
|
||||
|
@ -56,14 +56,21 @@ GetCharProps2(uint32_t aCh)
|
|||
|
||||
NS_NOTREACHED("Getting CharProps for codepoint outside Unicode range");
|
||||
// Default values for unassigned
|
||||
using namespace mozilla::unicode;
|
||||
static const nsCharProps2 undefined = {
|
||||
MOZ_SCRIPT_UNKNOWN, // Script code
|
||||
0, // East Asian Width
|
||||
HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, // General Category
|
||||
eCharType_LeftToRight, // Bidi Category
|
||||
mozilla::unicode::XIDMOD_NOT_CHARS, // Xidmod
|
||||
-1, // Numeric Value
|
||||
mozilla::unicode::HVT_NotHan // Han variant
|
||||
#if ENABLE_INTL_API
|
||||
PAIRED_BRACKET_TYPE_NONE,
|
||||
VERTICAL_ORIENTATION_R,
|
||||
XIDMOD_NOT_CHARS
|
||||
#else
|
||||
MOZ_SCRIPT_UNKNOWN,
|
||||
PAIRED_BRACKET_TYPE_NONE,
|
||||
HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,
|
||||
eCharType_LeftToRight,
|
||||
XIDMOD_NOT_CHARS,
|
||||
-1, // Numeric Value
|
||||
VERTICAL_ORIENTATION_R
|
||||
#endif
|
||||
};
|
||||
return undefined;
|
||||
}
|
||||
|
@ -93,7 +100,7 @@ to provide the most compact storage, depending on the distribution
|
|||
of values.
|
||||
*/
|
||||
|
||||
nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[] = {
|
||||
const nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[] = {
|
||||
/*
|
||||
* The order here corresponds to the HB_UNICODE_GENERAL_CATEGORY_* constants
|
||||
* of the hb_unicode_general_category_t enum in gfx/harfbuzz/src/hb-unicode.h.
|
||||
|
@ -130,6 +137,69 @@ nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[] = {
|
|||
/* SPACE_SEPARATOR */ nsIUGenCategory::kSeparator
|
||||
};
|
||||
|
||||
#ifdef ENABLE_INTL_API
|
||||
const hb_unicode_general_category_t sICUtoHBcategory[U_CHAR_CATEGORY_COUNT] = {
|
||||
HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, // U_GENERAL_OTHER_TYPES = 0,
|
||||
HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, // U_UPPERCASE_LETTER = 1,
|
||||
HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, // U_LOWERCASE_LETTER = 2,
|
||||
HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, // U_TITLECASE_LETTER = 3,
|
||||
HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, // U_MODIFIER_LETTER = 4,
|
||||
HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, // U_OTHER_LETTER = 5,
|
||||
HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, // U_NON_SPACING_MARK = 6,
|
||||
HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, // U_ENCLOSING_MARK = 7,
|
||||
HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, // U_COMBINING_SPACING_MARK = 8,
|
||||
HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, // U_DECIMAL_DIGIT_NUMBER = 9,
|
||||
HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, // U_LETTER_NUMBER = 10,
|
||||
HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, // U_OTHER_NUMBER = 11,
|
||||
HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR, // U_SPACE_SEPARATOR = 12,
|
||||
HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, // U_LINE_SEPARATOR = 13,
|
||||
HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, // U_PARAGRAPH_SEPARATOR = 14,
|
||||
HB_UNICODE_GENERAL_CATEGORY_CONTROL, // U_CONTROL_CHAR = 15,
|
||||
HB_UNICODE_GENERAL_CATEGORY_FORMAT, // U_FORMAT_CHAR = 16,
|
||||
HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, // U_PRIVATE_USE_CHAR = 17,
|
||||
HB_UNICODE_GENERAL_CATEGORY_SURROGATE, // U_SURROGATE = 18,
|
||||
HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, // U_DASH_PUNCTUATION = 19,
|
||||
HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, // U_START_PUNCTUATION = 20,
|
||||
HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, // U_END_PUNCTUATION = 21,
|
||||
HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, // U_CONNECTOR_PUNCTUATION = 22,
|
||||
HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, // U_OTHER_PUNCTUATION = 23,
|
||||
HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, // U_MATH_SYMBOL = 24,
|
||||
HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, // U_CURRENCY_SYMBOL = 25,
|
||||
HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, // U_MODIFIER_SYMBOL = 26,
|
||||
HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, // U_OTHER_SYMBOL = 27,
|
||||
HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, // U_INITIAL_PUNCTUATION = 28,
|
||||
HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, // U_FINAL_PUNCTUATION = 29,
|
||||
};
|
||||
#endif
|
||||
|
||||
uint8_t GetGeneralCategory(uint32_t aCh) {
|
||||
#if ENABLE_INTL_API
|
||||
return sICUtoHBcategory[u_charType(aCh)];
|
||||
#else
|
||||
return GetCharProps2(aCh).mCategory;
|
||||
#endif
|
||||
}
|
||||
|
||||
nsCharType GetBidiCat(uint32_t aCh) {
|
||||
#if ENABLE_INTL_API
|
||||
return nsCharType(u_charDirection(aCh));
|
||||
#else
|
||||
return nsCharType(GetCharProps2(aCh).mBidiCategory);
|
||||
#endif
|
||||
}
|
||||
|
||||
int8_t GetNumericValue(uint32_t aCh) {
|
||||
#if ENABLE_INTL_API
|
||||
UNumericType type =
|
||||
UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
|
||||
return type == U_NT_DECIMAL || type == U_NT_DIGIT
|
||||
? int8_t(u_getNumericValue(aCh))
|
||||
: -1;
|
||||
#else
|
||||
return GetCharProps2(aCh).mNumericValue;
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32_t
|
||||
GetMirroredChar(uint32_t aCh)
|
||||
{
|
||||
|
@ -160,14 +230,30 @@ GetCombiningClass(uint32_t aCh)
|
|||
#endif
|
||||
}
|
||||
|
||||
uint8_t
|
||||
GetScriptCode(uint32_t aCh)
|
||||
{
|
||||
#if ENABLE_INTL_API
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
return uscript_getScript(aCh, &err);
|
||||
#else
|
||||
return GetCharProps2(aCh).mScriptCode;
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32_t
|
||||
GetScriptTagForCode(int32_t aScriptCode)
|
||||
{
|
||||
#if ENABLE_INTL_API
|
||||
const char* tag = uscript_getShortName(UScriptCode(aScriptCode));
|
||||
return HB_TAG(tag[0], tag[1], tag[2], tag[3]);
|
||||
#else
|
||||
// this will safely return 0 for negative script codes, too :)
|
||||
if (uint32_t(aScriptCode) > ArrayLength(sScriptCodeToTag)) {
|
||||
return 0;
|
||||
}
|
||||
return sScriptCodeToTag[aScriptCode];
|
||||
#endif
|
||||
}
|
||||
|
||||
PairedBracketType GetPairedBracketType(uint32_t aCh)
|
||||
|
@ -254,6 +340,7 @@ GetTitlecaseForAll(uint32_t aCh)
|
|||
return aCh;
|
||||
}
|
||||
|
||||
#if 0 // currently unused - bug 857481
|
||||
HanVariantType
|
||||
GetHanVariant(uint32_t aCh)
|
||||
{
|
||||
|
@ -272,6 +359,7 @@ GetHanVariant(uint32_t aCh)
|
|||
// extract the appropriate 2-bit field from the value
|
||||
return HanVariantType((v >> ((aCh & 3) * 2)) & 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint32_t
|
||||
GetFullWidth(uint32_t aCh)
|
||||
|
|
|
@ -16,7 +16,7 @@ namespace mozilla {
|
|||
|
||||
namespace unicode {
|
||||
|
||||
extern nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[];
|
||||
extern const nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[];
|
||||
|
||||
// Return whether the char has a mirrored-pair counterpart.
|
||||
uint32_t GetMirroredChar(uint32_t aCh);
|
||||
|
@ -26,25 +26,19 @@ bool HasMirroredChar(uint32_t aChr);
|
|||
uint8_t GetCombiningClass(uint32_t aCh);
|
||||
|
||||
// returns the detailed General Category in terms of HB_UNICODE_* values
|
||||
inline uint8_t GetGeneralCategory(uint32_t aCh) {
|
||||
return GetCharProps2(aCh).mCategory;
|
||||
}
|
||||
uint8_t GetGeneralCategory(uint32_t aCh);
|
||||
|
||||
// returns the simplified Gen Category as defined in nsIUGenCategory
|
||||
inline nsIUGenCategory::nsUGenCategory GetGenCategory(uint32_t aCh) {
|
||||
return sDetailedToGeneralCategory[GetGeneralCategory(aCh)];
|
||||
}
|
||||
|
||||
inline uint8_t GetScriptCode(uint32_t aCh) {
|
||||
return GetCharProps2(aCh).mScriptCode;
|
||||
}
|
||||
nsCharType GetBidiCat(uint32_t aCh);
|
||||
|
||||
uint8_t GetScriptCode(uint32_t aCh);
|
||||
|
||||
uint32_t GetScriptTagForCode(int32_t aScriptCode);
|
||||
|
||||
inline nsCharType GetBidiCat(uint32_t aCh) {
|
||||
return nsCharType(GetCharProps2(aCh).mBidiCategory);
|
||||
}
|
||||
|
||||
/* This MUST match the values assigned by genUnicodePropertyData.pl! */
|
||||
enum VerticalOrientation {
|
||||
VERTICAL_ORIENTATION_U = 0,
|
||||
|
@ -93,10 +87,9 @@ inline XidmodType GetIdentifierModification(uint32_t aCh) {
|
|||
* To restrict to decimal digits, the caller should also check whether
|
||||
* GetGeneralCategory returns HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER
|
||||
*/
|
||||
inline int8_t GetNumericValue(uint32_t aCh) {
|
||||
return GetCharProps2(aCh).mNumericValue;
|
||||
}
|
||||
int8_t GetNumericValue(uint32_t aCh);
|
||||
|
||||
#if 0 // currently unused - bug 857481
|
||||
enum HanVariantType {
|
||||
HVT_NotHan = 0x0,
|
||||
HVT_SimplifiedOnly = 0x1,
|
||||
|
@ -105,6 +98,7 @@ enum HanVariantType {
|
|||
};
|
||||
|
||||
HanVariantType GetHanVariant(uint32_t aCh);
|
||||
#endif
|
||||
|
||||
uint32_t GetFullWidth(uint32_t aCh);
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче