#116030 provide a way to identify langGroup/rang for a unicode char

Add some checking code to figure out the range of a unicode char, and use this
piece of information to guide preference font searching.
r=rbs, sr=brendan
This commit is contained in:
shanjian%netscape.com 2002-02-12 04:51:41 +00:00
Родитель 19e6602925
Коммит faa348445e
4 изменённых файлов: 527 добавлений и 28 удалений

Просмотреть файл

@ -63,6 +63,7 @@ OBJS = \
.\$(OBJDIR)\nsGfxFactoryWin.obj \
.\$(OBJDIR)\nsCompressedCharMap.obj \
.\$(OBJDIR)\nsNativeThemeWin.obj \
.\$(OBJDIR)\nsUnicodeRange.obj \
$(NULL)
EXPORTS= nsIRenderingContextWin.h \

Просмотреть файл

@ -59,6 +59,7 @@
#include "prprf.h"
#include "nsReadableUtils.h"
#include "nsUnicharUtils.h"
#include "nsUnicodeRange.h"
#define NOT_SETUP 0x33
static PRBool gIsWIN95OR98 = NOT_SETUP;
@ -3047,6 +3048,8 @@ nsFontMetricsWin::FindGenericFont(HDC aDC, PRUint32 aChar)
return nsnull;
}
#define IsCJKLangGroupAtom(a) ((a)==gJA || (a)==gKO || (a)==gZHCN || (a)==gZHTW)
nsFontWin*
nsFontMetricsWin::FindPrefFont(HDC aDC, PRUint32 aChar)
{
@ -3055,37 +3058,70 @@ nsFontMetricsWin::FindPrefFont(HDC aDC, PRUint32 aChar)
return nsnull;
}
nsFont font("", 0, 0, 0, 0, 0);
// Try the pref of the user's ui lang group
// For example, if the ui language is Japanese, try pref from "ja"
// Make localized build work better on other OS
if (gUsersLocale != mLangGroup) {
nsAutoString langGroup;
gUsersLocale->ToString(langGroup);
AppendGenericFontFromPref(font.name,
NS_ConvertUCS2toUTF8(langGroup).get(),
NS_ConvertUCS2toUTF8(mGeneric).get());
}
// Try the pref of the user's system lang group
// For example, if the os language is Simplified Chinese,
// try pref from "zh-CN"
// Make English build work better on other OS
if ((gSystemLocale != mLangGroup) && (gSystemLocale != gUsersLocale)) {
nsAutoString langGroup;
gSystemLocale->ToString(langGroup);
AppendGenericFontFromPref(font.name,
NS_ConvertUCS2toUTF8(langGroup).get(),
NS_ConvertUCS2toUTF8(mGeneric).get());
}
// Also try all the default pref fonts enlisted from other languages
for (int i = 1; i < eCharset_COUNT; ++i) {
nsIAtom* langGroup = NS_NewAtom(gCharsetInfo[i].mLangGroup);
if((gUsersLocale != langGroup) && (gSystemLocale != langGroup)) {
AppendGenericFontFromPref(font.name, gCharsetInfo[i].mLangGroup,
// Sometimes we could not find the font in doc's suggested langGroup,(this usually means
// the language specified by doc is incorrect). The characters can, to a certain degree,
// tell us what language it is. This allows us to quickly locate and use a more appropriate
// font as indicated by user's preference. In some situations a set of possible languages may
// be identified instead of a single language (eg. CJK and latin). In this case we have to
// try every language in the set. gUserLocale and gSystemLocale provide some hints about
// which one should be tried first. This is important for CJK font, since the glyph for single
// char varies dramatically in different langauges. For latin languages, their glyphs are
// similar. In fact, they almost always share identical fonts. It will be a waste of time to
// figure out which one comes first. As a final fallback, unicode preference is always tried.
PRUint32 unicodeRange = FindCharUnicodeRange(aChar);
if (unicodeRange > kRangeSpecificItemNum) {
// a single language is identified
AppendGenericFontFromPref(font.name, LangGroupFromUnicodeRange(unicodeRange),
NS_ConvertUCS2toUTF8(mGeneric).get());
} else if (kRangeSetLatin == unicodeRange) {
// Character is from a latin language set, so try western and central european
// If mLangGroup is western or central european, this most probably will not be
// used, but is here as a fallback scenario.
AppendGenericFontFromPref(font.name, "x-western",
NS_ConvertUCS2toUTF8(mGeneric).get());
AppendGenericFontFromPref(font.name, "x-central-euro",
NS_ConvertUCS2toUTF8(mGeneric).get());
} else if (kRangeSetCJK == unicodeRange) {
// CJK, we have to be careful about the order, use locale info as hint
// then try user locale first, if it is CJK
if ((gUsersLocale != mLangGroup) && IsCJKLangGroupAtom(gUsersLocale)) {
const PRUnichar *usersLocaleLangGroup;
gUsersLocale->GetUnicode(&usersLocaleLangGroup);
AppendGenericFontFromPref(font.name, NS_ConvertUCS2toUTF8(usersLocaleLangGroup).get(),
NS_ConvertUCS2toUTF8(mGeneric).get());
}
NS_IF_RELEASE(langGroup);
}
// then system locale (os language)
if ((gSystemLocale != mLangGroup) && (gSystemLocale != gUsersLocale) && IsCJKLangGroupAtom(gSystemLocale)) {
const PRUnichar *systemLocaleLangGroup;
gSystemLocale->GetUnicode(&systemLocaleLangGroup);
AppendGenericFontFromPref(font.name, NS_ConvertUCS2toUTF8(systemLocaleLangGroup).get(),
NS_ConvertUCS2toUTF8(mGeneric).get());
}
// try all other languages in this set.
if (mLangGroup != gJA && gUsersLocale != gJA && gSystemLocale != gJA)
AppendGenericFontFromPref(font.name, "ja",
NS_ConvertUCS2toUTF8(mGeneric).get());
if (mLangGroup != gZHCN && gUsersLocale != gZHCN && gSystemLocale != gZHCN)
AppendGenericFontFromPref(font.name, "zh-CN",
NS_ConvertUCS2toUTF8(mGeneric).get());
if (mLangGroup != gZHTW && gUsersLocale != gZHTW && gSystemLocale != gZHTW)
AppendGenericFontFromPref(font.name, "zh-TW",
NS_ConvertUCS2toUTF8(mGeneric).get());
if (mLangGroup != gKO && gUsersLocale != gKO && gSystemLocale != gKO)
AppendGenericFontFromPref(font.name, "ko",
NS_ConvertUCS2toUTF8(mGeneric).get());
}
// always try unicode as fallback
AppendGenericFontFromPref(font.name, "x-unicode",
NS_ConvertUCS2toUTF8(mGeneric).get());
// use the font list to find font
GenericFontEnumContext context = {aDC, aChar, nsnull, this};
font.EnumerateFamilies(GenericFontEnumCallback, &context);
if (context.mFont) { // a suitable font was found

Просмотреть файл

@ -0,0 +1,354 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Netscape Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the NPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsUnicodeRange.h"
// This table depends on unicode range definitions.
// Each item's index must correspond unicode range value
// eg. x-cyrillic = LangGroupTable[kRangeCyrillic]
const char *gUnicodeRangeToLangGroupTable[] =
{
"x-cyrillic",
"el",
"tr",
"he",
"ar",
"x-baltic",
"th",
"ko",
"ja",
"zh-CN",
"zh-TW",
};
/**********************************************************************
* Unicode subranges as defined in unicode 3.0
* x-western, x-central-euro, tr, x-baltic -> latin
* 0000 - 036f
* 1e00 - 1eff
* 2000 - 206f (general punctuation)
* 20a0 - 20cf (currency symbols)
* 2100 - 214f (letterlike symbols)
* 2150 - 218f (Number Forms)
* el -> greek
* 0370 - 03ff
* 1f00 - 1fff
* x-cyrillic -> cyrillic
* 0400 - 04ff
* he -> hebrew
* 0590 - 05ff
* ar -> arabic
* 0600 - 06ff
* fb50 - fdff (arabic presentation forms)
* fe70 - feff (arabic presentation forms b)
* th - thai
* 0e00 - 0e7f
* ko -> korean
* ac00 - d7af (hangul Syllables)
* 1100 - 11ff (jamo)
* 3130 - 318f (hangul compatibility jamo)
* ja
* 3040 - 309f (hiragana)
* 30a0 - 30ff (katakana)
* zh-CN
* zh-TW
*
* CJK
* 3100 - 312f (bopomofo)
* 31a0 - 31bf (bopomofo extended)
* 3000 - 303f (CJK Symbols and Punctuation)
* 2e80 - 2eff (CJK radicals supplement)
* 2f00 - 2fdf (Kangxi Radicals)
* 2ff0 - 2fff (Ideographic Description Characters)
* 3190 - 319f (kanbun)
* 3200 - 32ff (Enclosed CJK letters and Months)
* 3300 - 33ff (CJK compatibility)
* 3400 - 4dbf (CJK Unified Ideographs Extension A)
* 4e00 - 9faf (CJK Unified Ideographs)
* f900 - fa5f (CJK Compatibility Ideographs)
* fe30 - fe4f (CJK compatibility Forms)
* ff00 - ffef (halfwidth and fullwidth forms)
*
* Armenian
* 0530 - 058f
* Sriac
* 0700 - 074f
* Thaana
* 0780 - 07bf
* Devanagari
* 0900 - 097f
* Bengali
* 0980 - 09ff
* Gurmukhi
* 0a00 - 0a7f
* Gujarati
* 0a80 - 0aff
* Oriya
* 0b00 - 0b7f
* Tamil
* 0b80 - 0bff
* Telugu
* 0c00 - 0c7f
* Kannada
* 0c80 - 0cff
* Malayalam
* 0d00 - 0d7f
* Sinhala
* 0d80 - 0def
* Lao
* 0e80 - 0eff
* Tibetan
* 0f00 - 0fbf
* Myanmar
* 1000 - 109f
* Georgian
* 10a0 - 10ff
* Ethiopic
* 1200 - 137f
* Cherokee
* 13a0 - 13ff
* Canadian Aboriginal Syllabics
* 1400 - 167f
* Ogham
* 1680 - 169f
* Runic
* 16a0 - 16ff
* Khmer
* 1780 - 17ff
* Mongolian
* 1800 - 18af
* Misc - superscripts and subscripts
* 2070 - 209f
* Misc - Combining Diacritical Marks for Symbols
* 20d0 - 20ff
* Misc - Arrows
* 2190 - 21ff
* Misc - Mathematical Operators
* 2200 - 22ff
* Misc - Miscellaneous Technical
* 2300 - 23ff
* Misc - Control picture
* 2400 - 243f
* Misc - Optical character recognition
* 2440 - 2450
* Misc - Enclose Alphanumerics
* 2460 - 24ff
* Misc - Box Drawing
* 2500 - 257f
* Misc - Block Elements
* 2580 - 259f
* Misc - Geometric Shapes
* 25a0 - 25ff
* Misc - Miscellaneous Symbols
* 2600 - 267f
* Misc - Dingbats
* 2700 - 27bf
* Misc - Braille Patterns
* 2800 - 28ff
* Yi Syllables
* a000 - a48f
* Yi radicals
* a490 - a4cf
* Alphabetic Presentation Forms
* fb00 - fb4f
* Misc - Combining half Marks
* fe20 - fe2f
* Misc - small form variants
* fe50 - fe6f
* Misc - Specials
* fff0 - ffff
*********************************************************************/
#define NUM_OF_SUBTABLES 7
#define SUBTABLE_SIZE 16
static PRUint8 gUnicodeSubrangeTable[NUM_OF_SUBTABLES][SUBTABLE_SIZE] =
{
{ // table for X---
kRangeTableBase+1, //u0xxx
kRangeTableBase+2, //u1xxx
kRangeTableBase+3, //u2xxx
kRangeSetCJK, //u3xxx
kRangeSetCJK, //u4xxx
kRangeSetCJK, //u5xxx
kRangeSetCJK, //u6xxx
kRangeSetCJK, //u7xxx
kRangeSetCJK, //u8xxx
kRangeSetCJK, //u9xxx
kRangeTableBase+4, //uaxxx
kRangeKorean, //ubxxx
kRangeKorean, //ucxxx
kRangeTableBase+5, //udxxx
kRangePrivate, //uexxx
kRangeTableBase+6 //ufxxx
},
{ //table for 0X--
kRangeSetLatin, //u00xx
kRangeSetLatin, //u01xx
kRangeSetLatin, //u02xx
kRangeGreek, //u03xx XXX 0300-036f is in fact kRangeCombiningDiacriticalMarks
kRangeCyrillic, //u04xx
kRangeHebrew, //u05xx XXX 0530-058f is in fact kRangeArmenian
kRangeArabic, //u06xx
kRangeSriacThaana, //u07xx
kRangeUnassigned, //u08xx
kRangeDevanagariBengali, //u09xx
kRangeGurmukhiGujarati, //u0axx
kRangeOriyaTamil, //u0bxx
kRangeTeluguKannada, //u0cxx
kRangeMalayalamSinhala, //u0dxx
kRangeThaiLao, //u0exx
kRangeTibetan, //u0fxx
},
{ //table for 1x--
kRangeMyanmarGeorgian, //u10xx
kRangeKorean, //u11xx
kRangeEthiopic, //u12xx
kRangeEthiopicCherokee, //u13xx
kRangeAboriginal, //u14xx
kRangeAboriginal, //u15xx
kRangeAboriginalOghamRunic, //u16xx
kRangeKhmer, //u17xx
kRangeMongolian, //u18xx
kRangeUnassigned, //u19xx
kRangeUnassigned, //u1axx
kRangeUnassigned, //u1bxx
kRangeUnassigned, //u1cxx
kRangeUnassigned, //u1dxx
kRangeSetLatin, //u1exx
kRangeGreek, //u1fxx
},
{ //table for 2x--
kRangeSetLatin, //u20xx
kRangeSetLatin, //u21xx
kRangeMathOperators, //u22xx
kRangeMiscTechical, //u23xx
kRangeControlOpticalEnclose, //u24xx
kRangeBoxBlockGeometrics, //u25xx
kRangeMiscSymbols, //u26xx
kRangeDingbats, //u27xx
kRangeBraillePattern, //u28xx
kRangeUnassigned, //u29xx
kRangeUnassigned, //u2axx
kRangeUnassigned, //u2bxx
kRangeUnassigned, //u2cxx
kRangeUnassigned, //u2dxx
kRangeSetCJK, //u2exx
kRangeSetCJK, //u2fxx
},
{ //table for ax--
kRangeYi, //ua0xx
kRangeYi, //ua1xx
kRangeYi, //ua2xx
kRangeYi, //ua3xx
kRangeYi, //ua4xx
kRangeUnassigned, //ua5xx
kRangeUnassigned, //ua6xx
kRangeUnassigned, //ua7xx
kRangeUnassigned, //ua8xx
kRangeUnassigned, //ua9xx
kRangeUnassigned, //uaaxx
kRangeUnassigned, //uabxx
kRangeKorean, //uacxx
kRangeKorean, //uadxx
kRangeKorean, //uaexx
kRangeKorean, //uafxx
},
{ //table for dx--
kRangeKorean, //ud0xx
kRangeKorean, //ud1xx
kRangeKorean, //ud2xx
kRangeKorean, //ud3xx
kRangeKorean, //ud4xx
kRangeKorean, //ud5xx
kRangeKorean, //ud6xx
kRangeKorean, //ud7xx
kRangeSurrogate, //ud8xx
kRangeSurrogate, //ud9xx
kRangeSurrogate, //udaxx
kRangeSurrogate, //udbxx
kRangeSurrogate, //udcxx
kRangeSurrogate, //uddxx
kRangeSurrogate, //udexx
kRangeSurrogate, //udfxx
},
{ // table for fx--
kRangePrivate, //uf0xx
kRangePrivate, //uf1xx
kRangePrivate, //uf2xx
kRangePrivate, //uf3xx
kRangePrivate, //uf4xx
kRangePrivate, //uf5xx
kRangePrivate, //uf6xx
kRangePrivate, //uf7xx
kRangePrivate, //uf8xx
kRangeSetCJK, //uf9xx
kRangeSetCJK, //ufaxx
kRangeArabic, //ufbxx, includes alphabic presentation form
kRangeArabic, //ufcxx
kRangeArabic, //ufdxx
kRangeArabic, //ufexx, includes Combining half marks,
// CJK compatibility forms,
// CJK compatibility forms,
// small form variants
kRangeSetCJK, //uffxx, halfwidth and fullwidth forms, includes Special
}
};
// A two level index is almost enough for locating a range, with the
// exception of u03xx and u05xx. Since we don't really care about range for
// Armenian and combining diacritical marks in our font application, they are
// not discriminated further. But future adoption of this module for other use
// should be aware of this limitation. The implementation can be extended if
// there is such a need.
PRUint32 FindCharUnicodeRange(PRUnichar ch)
{
PRUint32 range;
//search the first table
range = gUnicodeSubrangeTable[0][ch >> 12];
if (range < kRangeTableBase)
// we try to get a specific range
return range;
// otherwise, we have one more table to look at
range = gUnicodeSubrangeTable[range - kRangeTableBase][(ch & 0x0f00) >> 8];
return range;
}

Просмотреть файл

@ -0,0 +1,108 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Netscape Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the NPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nscore.h"
// The following constants define unicode subranges
// values below kRangeNum must be continuous so that we can map to
// lang group directly.
// all ranges we care about should be defined under 32, that allows
// us to store range using bits of a PRUint32
// frequently used range definitions
const PRUint8 kRangeCyrillic = 0;
const PRUint8 kRangeGreek = 1;
const PRUint8 kRangeTurkish = 2;
const PRUint8 kRangeHebrew = 3;
const PRUint8 kRangeArabic = 4;
const PRUint8 kRangeBaltic = 5;
const PRUint8 kRangeThaiLao = 6;
const PRUint8 kRangeKorean = 7;
const PRUint8 kRangeJapanese = 8;
const PRUint8 kRangeSChinese = 9;
const PRUint8 kRangeTChinese = 10;
const PRUint8 kRangeSpecificItemNum =11;
//range/rangeSet grow to this place 8-29
const PRUint8 kRangeSetStart = 30; // range set definition starts from here
const PRUint8 kRangeSetLatin = 30;
const PRUint8 kRangeSetCJK = 31;
const PRUint8 kRangeSetEnd = 31; // range set definition ends here
// less frequently used range definition
const PRUint8 kRangeSurrogate = 32;
const PRUint8 kRangePrivate = 33;
const PRUint8 kRangeMisc = 34;
const PRUint8 kRangeUnassigned = 35;
const PRUint8 kRangeSriacThaana = 36;
const PRUint8 kRangeDevanagariBengali = 37;
const PRUint8 kRangeGurmukhiGujarati = 38;
const PRUint8 kRangeOriyaTamil = 39;
const PRUint8 kRangeTeluguKannada = 40;
const PRUint8 kRangeMalayalamSinhala = 41;
const PRUint8 kRangeTibetan = 42;
const PRUint8 kRangeMyanmarGeorgian = 43;
const PRUint8 kRangeEthiopic = 44;
const PRUint8 kRangeEthiopicCherokee = 45;
const PRUint8 kRangeAboriginal = 46;
const PRUint8 kRangeAboriginalOghamRunic = 47;
const PRUint8 kRangeKhmer = 48;
const PRUint8 kRangeMongolian = 49;
const PRUint8 kRangeMathOperators = 50;
const PRUint8 kRangeMiscTechical = 51;
const PRUint8 kRangeControlOpticalEnclose = 52;
const PRUint8 kRangeBoxBlockGeometrics = 53;
const PRUint8 kRangeMiscSymbols = 54;
const PRUint8 kRangeDingbats = 55;
const PRUint8 kRangeBraillePattern = 56;
const PRUint8 kRangeYi = 57;
const PRUint8 kRangeCombiningDiacriticalMarks = 58;
const PRUint8 kRangeArmenian = 59;
const PRUint8 kRangeTableBase = 128; //values over 127 are reserved for internal use only
extern PRUint32 FindCharUnicodeRange(PRUnichar ch);
extern const char* gUnicodeRangeToLangGroupTable[];
inline const char* LangGroupFromUnicodeRange(PRUint8 unicodeRange)
{
if (kRangeSpecificItemNum > unicodeRange)
return gUnicodeRangeToLangGroupTable[unicodeRange];
return nsnull;
}