pjs/gfx/thebes/nsUnicodeRange.cpp

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#include "nsUnicodeRange.h"
#include "nsIAtom.h"
#include "gfxAtoms.h"

// This table depends on unicode range definitions. 
// Each item's index must correspond unicode range value
// eg. x-cyrillic = LangGroupTable[kRangeCyrillic]
static nsIAtom **gUnicodeRangeToLangGroupAtomTable[] =
{
  &gfxAtoms::x_cyrillic,
  &gfxAtoms::el,
  &gfxAtoms::tr,
  &gfxAtoms::he,
  &gfxAtoms::ar,
  &gfxAtoms::x_baltic,
  &gfxAtoms::th,
  &gfxAtoms::ko,
  &gfxAtoms::ja,
  &gfxAtoms::zh_cn,
  &gfxAtoms::zh_tw,
  &gfxAtoms::x_devanagari,
  &gfxAtoms::x_tamil,
  &gfxAtoms::x_armn,
  &gfxAtoms::x_beng,
  &gfxAtoms::x_cans,
  &gfxAtoms::x_ethi,
  &gfxAtoms::x_geor,
  &gfxAtoms::x_gujr,
  &gfxAtoms::x_guru,
  &gfxAtoms::x_khmr,
  &gfxAtoms::x_mlym,
  &gfxAtoms::x_orya,
  &gfxAtoms::x_telu,
  &gfxAtoms::x_knda,
  &gfxAtoms::x_sinh,
  &gfxAtoms::x_tibt
};

/**********************************************************************
 * Unicode subranges as defined in unicode 3.0
 * x-western, x-central-euro, tr, x-baltic  -> latin 
 *  0000 - 036f 
 *  1e00 - 1eff
 *  2000 - 206f  (general punctuation)
 *  20a0 - 20cf  (currency symbols)
 *  2100 - 214f  (letterlike symbols)
 *  2150 - 218f  (Number Forms)
 * el         -> greek
 *  0370 - 03ff
 *  1f00 - 1fff
 * x-cyrillic -> cyrillic
 *  0400 - 04ff
 * he         -> hebrew
 *  0590 - 05ff
 * ar         -> arabic
 *  0600 - 06ff
 *  fb50 - fdff (arabic presentation forms)
 *  fe70 - feff (arabic presentation forms b)
 * th - thai
 *  0e00 - 0e7f
 * ko        -> korean
 *  ac00 - d7af  (hangul Syllables)
 *  1100 - 11ff    (jamo)
 *  3130 - 318f (hangul compatibility jamo)
 * ja
 *  3040 - 309f (hiragana)
 *  30a0 - 30ff (katakana)
 * zh-CN
 * zh-TW
 *
 * CJK
 *  3100 - 312f (bopomofo)
 *  31a0 - 31bf (bopomofo extended)
 *  3000 - 303f (CJK Symbols and Punctuation) 
 *  2e80 - 2eff (CJK radicals supplement)
 *  2f00 - 2fdf (Kangxi Radicals)
 *  2ff0 - 2fff (Ideographic Description Characters)
 *  3190 - 319f (kanbun)
 *  3200 - 32ff (Enclosed CJK letters and Months)
 *  3300 - 33ff (CJK compatibility)
 *  3400 - 4dbf (CJK Unified Ideographs Extension A)
 *  4e00 - 9faf (CJK Unified Ideographs)
 *  f900 - fa5f (CJK Compatibility Ideographs)
 *  fe30 - fe4f (CJK compatibility Forms)
 *  ff00 - ffef (halfwidth and fullwidth forms)
 *
 * Armenian
 *  0530 - 058f 
 * Sriac 
 *  0700 - 074f
 * Thaana
 *  0780 - 07bf
 * Devanagari
 *  0900 - 097f
 * Bengali
 *  0980 - 09ff
 * Gurmukhi
 *  0a00 - 0a7f
 * Gujarati
 *  0a80 - 0aff
 * Oriya
 *  0b00 - 0b7f
 * Tamil
 *  0b80 - 0bff
 * Telugu
 *  0c00 - 0c7f
 * Kannada
 *  0c80 - 0cff
 * Malayalam
 *  0d00 - 0d7f
 * Sinhala
 *  0d80 - 0def
 * Lao
 *  0e80 - 0eff
 * Tibetan
 *  0f00 - 0fbf
 * Myanmar
 *  1000 - 109f
 * Georgian
 *  10a0 - 10ff
 * Ethiopic
 *  1200 - 137f
 * Cherokee
 *  13a0 - 13ff
 * Canadian Aboriginal Syllabics
 *  1400 - 167f
 * Ogham
 *  1680 - 169f
 * Runic 
 *  16a0 - 16ff
 * Khmer
 *  1780 - 17ff
 * Mongolian
 *  1800 - 18af
 * Misc - superscripts and subscripts
 *  2070 - 209f
 * Misc - Combining Diacritical Marks for Symbols
 *  20d0 - 20ff
 * Misc - Arrows
 *  2190 - 21ff
 * Misc - Mathematical Operators
 *  2200 - 22ff
 * Misc - Miscellaneous Technical
 *  2300 - 23ff
 * Misc - Control picture
 *  2400 - 243f
 * Misc - Optical character recognition
 *  2440 - 2450
 * Misc - Enclose Alphanumerics
 *  2460 - 24ff
 * Misc - Box Drawing 
 *  2500 - 257f
 * Misc - Block Elements
 *  2580 - 259f
 * Misc - Geometric Shapes
 *  25a0 - 25ff
 * Misc - Miscellaneous Symbols
 *  2600 - 267f
 * Misc - Dingbats
 *  2700 - 27bf
 * Misc - Braille Patterns
 *  2800 - 28ff
 * Yi Syllables
 *  a000 - a48f
 * Yi radicals
 *  a490 - a4cf
 * Alphabetic Presentation Forms
 *  fb00 - fb4f
 * Misc - Combining half Marks
 *  fe20 - fe2f
 * Misc - small form variants
 *  fe50 - fe6f
 * Misc - Specials
 *  fff0 - ffff
 *********************************************************************/


#define NUM_OF_SUBTABLES      9
#define SUBTABLE_SIZE         16

static const PRUint8 gUnicodeSubrangeTable[NUM_OF_SUBTABLES][SUBTABLE_SIZE] = 
{ 
  { // table for X---
    kRangeTableBase+1,  //u0xxx
    kRangeTableBase+2,  //u1xxx
    kRangeTableBase+3,  //u2xxx
    kRangeSetCJK,       //u3xxx
    kRangeSetCJK,       //u4xxx
    kRangeSetCJK,       //u5xxx
    kRangeSetCJK,       //u6xxx
    kRangeSetCJK,       //u7xxx
    kRangeSetCJK,       //u8xxx
    kRangeSetCJK,       //u9xxx
    kRangeTableBase+4,  //uaxxx
    kRangeKorean,       //ubxxx
    kRangeKorean,       //ucxxx
    kRangeTableBase+5,  //udxxx
    kRangePrivate,      //uexxx
    kRangeTableBase+6   //ufxxx
  },
  { //table for 0X--
    kRangeSetLatin,          //u00xx
    kRangeSetLatin,          //u01xx
    kRangeSetLatin,          //u02xx
    kRangeGreek,             //u03xx     XXX 0300-036f is in fact kRangeCombiningDiacriticalMarks
    kRangeCyrillic,          //u04xx
    kRangeTableBase+7,       //u05xx, includes Cyrillic supplement, Hebrew, and Armenian
    kRangeArabic,            //u06xx
    kRangeTertiaryTable,     //u07xx
    kRangeUnassigned,        //u08xx
    kRangeTertiaryTable,     //u09xx
    kRangeTertiaryTable,     //u0axx
    kRangeTertiaryTable,     //u0bxx
    kRangeTertiaryTable,     //u0cxx
    kRangeTertiaryTable,     //u0dxx
    kRangeTertiaryTable,     //u0exx
    kRangeTibetan,           //u0fxx
  },
  { //table for 1x--
    kRangeTertiaryTable,     //u10xx
    kRangeKorean,            //u11xx
    kRangeEthiopic,          //u12xx
    kRangeTertiaryTable,     //u13xx
    kRangeCanadian,          //u14xx
    kRangeCanadian,          //u15xx
    kRangeTertiaryTable,     //u16xx
    kRangeKhmer,             //u17xx
    kRangeMongolian,         //u18xx
    kRangeUnassigned,        //u19xx
    kRangeUnassigned,        //u1axx
    kRangeUnassigned,        //u1bxx
    kRangeUnassigned,        //u1cxx
    kRangeUnassigned,        //u1dxx
    kRangeSetLatin,          //u1exx
    kRangeGreek,             //u1fxx
  },
  { //table for 2x--
    kRangeSetLatin,          //u20xx
    kRangeSetLatin,          //u21xx
    kRangeMathOperators,     //u22xx
    kRangeMiscTechnical,     //u23xx
    kRangeControlOpticalEnclose, //u24xx
    kRangeBoxBlockGeometrics, //u25xx
    kRangeMiscSymbols,       //u26xx
    kRangeDingbats,          //u27xx
    kRangeBraillePattern,    //u28xx
    kRangeUnassigned,        //u29xx
    kRangeUnassigned,        //u2axx
    kRangeUnassigned,        //u2bxx
    kRangeUnassigned,        //u2cxx
    kRangeUnassigned,        //u2dxx
    kRangeSetCJK,            //u2exx
    kRangeSetCJK,            //u2fxx
  },
  {  //table for ax--
    kRangeYi,                //ua0xx
    kRangeYi,                //ua1xx
    kRangeYi,                //ua2xx
    kRangeYi,                //ua3xx
    kRangeYi,                //ua4xx
    kRangeUnassigned,        //ua5xx
    kRangeUnassigned,        //ua6xx
    kRangeUnassigned,        //ua7xx
    kRangeUnassigned,        //ua8xx
    kRangeUnassigned,        //ua9xx
    kRangeUnassigned,        //uaaxx
    kRangeUnassigned,        //uabxx
    kRangeKorean,            //uacxx
    kRangeKorean,            //uadxx
    kRangeKorean,            //uaexx
    kRangeKorean,            //uafxx
  },
  {  //table for dx--
    kRangeKorean,            //ud0xx
    kRangeKorean,            //ud1xx
    kRangeKorean,            //ud2xx
    kRangeKorean,            //ud3xx
    kRangeKorean,            //ud4xx
    kRangeKorean,            //ud5xx
    kRangeKorean,            //ud6xx
    kRangeKorean,            //ud7xx
    kRangeSurrogate,         //ud8xx
    kRangeSurrogate,         //ud9xx
    kRangeSurrogate,         //udaxx
    kRangeSurrogate,         //udbxx
    kRangeSurrogate,         //udcxx
    kRangeSurrogate,         //uddxx
    kRangeSurrogate,         //udexx
    kRangeSurrogate,         //udfxx
  },
  { // table for fx--
    kRangePrivate,           //uf0xx 
    kRangePrivate,           //uf1xx 
    kRangePrivate,           //uf2xx 
    kRangePrivate,           //uf3xx 
    kRangePrivate,           //uf4xx 
    kRangePrivate,           //uf5xx 
    kRangePrivate,           //uf6xx 
    kRangePrivate,           //uf7xx 
    kRangePrivate,           //uf8xx 
    kRangeSetCJK,            //uf9xx 
    kRangeSetCJK,            //ufaxx 
    kRangeArabic,            //ufbxx, includes alphabic presentation form
    kRangeArabic,            //ufcxx
    kRangeArabic,            //ufdxx
    kRangeArabic,            //ufexx, includes Combining half marks, 
                             //                CJK compatibility forms, 
                             //                CJK compatibility forms, 
                             //                small form variants
    kRangeTableBase+8,       //uffxx, halfwidth and fullwidth forms, includes Specials
  },
  { //table for 0x0500 - 0x05ff
    kRangeCyrillic,          //u050x
    kRangeCyrillic,          //u051x
    kRangeCyrillic,          //u052x
    kRangeArmenian,          //u053x
    kRangeArmenian,          //u054x
    kRangeArmenian,          //u055x
    kRangeArmenian,          //u056x
    kRangeArmenian,          //u057x
    kRangeArmenian,          //u058x
    kRangeHebrew,            //u059x
    kRangeHebrew,            //u05ax
    kRangeHebrew,            //u05bx
    kRangeHebrew,            //u05cx
    kRangeHebrew,            //u05dx
    kRangeHebrew,            //u05ex
    kRangeHebrew,            //u05fx
  },
  { //table for 0xff00 - 0xffff
    kRangeSetCJK,            //uff0x, fullwidth latin
    kRangeSetCJK,            //uff1x, fullwidth latin
    kRangeSetCJK,            //uff2x, fullwidth latin
    kRangeSetCJK,            //uff3x, fullwidth latin
    kRangeSetCJK,            //uff4x, fullwidth latin
    kRangeSetCJK,            //uff5x, fullwidth latin
    kRangeSetCJK,            //uff6x, halfwidth katakana
    kRangeSetCJK,            //uff7x, halfwidth katakana
    kRangeSetCJK,            //uff8x, halfwidth katakana
    kRangeSetCJK,            //uff9x, halfwidth katakana
    kRangeSetCJK,            //uffax, halfwidth hangul jamo
    kRangeSetCJK,            //uffbx, halfwidth hangul jamo
    kRangeSetCJK,            //uffcx, halfwidth hangul jamo
    kRangeSetCJK,            //uffdx, halfwidth hangul jamo
    kRangeSetCJK,            //uffex, fullwidth symbols
    kRangeSpecials,          //ufffx, Specials
  },
};

// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80) 
// code points  so that the number of entries in the tertiary range
// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal 
// syllabaries take multiple chunks and Ogham and Runic share  a single chunk.
#define TERTIARY_TABLE_SIZE ((0x1700 - 0x0700) / 0x80)

static const PRUint8 gUnicodeTertiaryRangeTable[TERTIARY_TABLE_SIZE] =
{ //table for 0x0700 - 0x1600 
    kRangeSyriac,            //u070x
    kRangeThaana,            //u078x
    kRangeUnassigned,        //u080x  place holder(resolved in the 2ndary tab.)
    kRangeUnassigned,        //u088x  place holder(resolved in the 2ndary tab.)
    kRangeDevanagari,        //u090x
    kRangeBengali,           //u098x
    kRangeGurmukhi,          //u0a0x
    kRangeGujarati,          //u0a8x
    kRangeOriya,             //u0b0x
    kRangeTamil,             //u0b8x
    kRangeTelugu,            //u0c0x
    kRangeKannada,           //u0c8x
    kRangeMalayalam,         //u0d0x
    kRangeSinhala,           //u0d8x
    kRangeThai,              //u0e0x  
    kRangeLao,               //u0e8x
    kRangeTibetan,           //u0f0x  place holder(resolved in the 2ndary tab.)
    kRangeTibetan,           //u0f8x  place holder(resolved in the 2ndary tab.)
    kRangeMyanmar,           //u100x
    kRangeGeorgian,          //u108x
    kRangeKorean,            //u110x  place holder(resolved in the 2ndary tab.)
    kRangeKorean,            //u118x  place holder(resolved in the 2ndary tab.)
    kRangeEthiopic,          //u120x  place holder(resolved in the 2ndary tab.)
    kRangeEthiopic,          //u128x  place holder(resolved in the 2ndary tab.)
    kRangeEthiopic,          //u130x  
    kRangeCherokee,          //u138x
    kRangeCanadian,          //u140x  place holder(resolved in the 2ndary tab.)
    kRangeCanadian,          //u148x  place holder(resolved in the 2ndary tab.)
    kRangeCanadian,          //u150x  place holder(resolved in the 2ndary tab.)
    kRangeCanadian,          //u158x  place holder(resolved in the 2ndary tab.)
    kRangeCanadian,          //u160x  
    kRangeOghamRunic,        //u168x  this contains two scripts, Ogham & Runic
};

// A two level index is almost enough for locating a range, with the 
// exception of u03xx and u05xx. Since we don't really care about range for
// combining diacritical marks in our font application, they are 
// not discriminated further. But future adoption of this module for other use 
// should be aware of this limitation. The implementation can be extended if 
// there is such a need.
// For Indic, Southeast Asian scripts and some other scripts between
// U+0700 and U+16FF, it's extended to the third level.
PRUint32 FindCharUnicodeRange(PRUnichar ch)
{
  PRUint32 range;

  //search the first table
  range = gUnicodeSubrangeTable[0][ch >> 12];
  
  if (range < kRangeTableBase)
    // we try to get a specific range 
    return range;

  // otherwise, we have one more table to look at
  range = gUnicodeSubrangeTable[range - kRangeTableBase][(ch & 0x0f00) >> 8];
  if (range < kRangeTableBase)
    return range;
  if (range < kRangeTertiaryTable)
    return gUnicodeSubrangeTable[range - kRangeTableBase][(ch & 0x00f0) >> 4];

  // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
  return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
}

nsIAtom *LangGroupFromUnicodeRange(PRUint8 unicodeRange)
{
  if (kRangeSpecificItemNum > unicodeRange) {
    nsIAtom **atom = gUnicodeRangeToLangGroupAtomTable[unicodeRange];
    return *atom;
  }
  return nsnull;
}
fixing windows fonts (bug 340590). r=vlad. 2006-06-10 04:21:05 +04:00			`/* -- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -- */`
			`/* *** BEGIN LICENSE BLOCK ***`
			`* Version: MPL 1.1/GPL 2.0/LGPL 2.1`
			`*`
			`* The contents of this file are subject to the Mozilla Public License Version`
			`* 1.1 (the "License"); you may not use this file except in compliance with`
			`* the License. You may obtain a copy of the License at`
			`* http://www.mozilla.org/MPL/`
			`*`
			`* Software distributed under the License is distributed on an "AS IS" basis,`
			`* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License`
			`* for the specific language governing rights and limitations under the`
			`* License.`
			`*`
			`* The Original Code is mozilla.org code.`
			`*`
			`* The Initial Developer of the Original Code is`
			`* Netscape Communications Corporation.`
			`* Portions created by the Initial Developer are Copyright (C) 1998`
			`* the Initial Developer. All Rights Reserved.`
			`*`
			`* Contributor(s):`
			`*`
			`* Alternatively, the contents of this file may be used under the terms of`
			`* either of the GNU General Public License Version 2 or later (the "GPL"),`
			`* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),`
			`* in which case the provisions of the GPL or the LGPL are applicable instead`
			`* of those above. If you wish to allow use of your version of this file only`
			`* under the terms of either the GPL or the LGPL, and not to allow others to`
			`* use your version of this file under the terms of the MPL, indicate your`
			`* decision by deleting the provisions above and replace them with the notice`
			`* and other provisions required by the GPL or the LGPL. If you do not delete`
			`* the provisions above, a recipient may use your version of this file under`
			`* the terms of any one of the MPL, the GPL or the LGPL.`
			`*`
			`* *** END LICENSE BLOCK *** */`

			`#include "nsUnicodeRange.h"`
Bug 524107 - part 2 - store language as atom instead of string in gfxFontStyle. r=roc sr=mats 2010-02-24 20:57:57 +03:00			`#include "nsIAtom.h"`
			`#include "gfxAtoms.h"`
fixing windows fonts (bug 340590). r=vlad. 2006-06-10 04:21:05 +04:00
			`// This table depends on unicode range definitions.`
			`// Each item's index must correspond unicode range value`
			`// eg. x-cyrillic = LangGroupTable[kRangeCyrillic]`
Bug 524107 - part 2 - store language as atom instead of string in gfxFontStyle. r=roc sr=mats 2010-02-24 20:57:57 +03:00			`static nsIAtom **gUnicodeRangeToLangGroupAtomTable[] =`
fixing windows fonts (bug 340590). r=vlad. 2006-06-10 04:21:05 +04:00			`{`
Bug 524107 - part 2 - store language as atom instead of string in gfxFontStyle. r=roc sr=mats 2010-02-24 20:57:57 +03:00			`&gfxAtoms::x_cyrillic,`
			`&gfxAtoms::el,`
			`&gfxAtoms::tr,`
			`&gfxAtoms::he,`
			`&gfxAtoms::ar,`
			`&gfxAtoms::x_baltic,`
			`&gfxAtoms::th,`
			`&gfxAtoms::ko,`
			`&gfxAtoms::ja,`
			`&gfxAtoms::zh_cn,`
			`&gfxAtoms::zh_tw,`
			`&gfxAtoms::x_devanagari,`
			`&gfxAtoms::x_tamil,`
			`&gfxAtoms::x_armn,`
			`&gfxAtoms::x_beng,`
			`&gfxAtoms::x_cans,`
			`&gfxAtoms::x_ethi,`
			`&gfxAtoms::x_geor,`
			`&gfxAtoms::x_gujr,`
			`&gfxAtoms::x_guru,`
			`&gfxAtoms::x_khmr,`
			`&gfxAtoms::x_mlym,`
			`&gfxAtoms::x_orya,`
			`&gfxAtoms::x_telu,`
			`&gfxAtoms::x_knda,`
Add Tibetan to font selection UI. Bug 441110, r=emk 2010-03-04 11:55:53 +03:00			`&gfxAtoms::x_sinh,`
			`&gfxAtoms::x_tibt`
fixing windows fonts (bug 340590). r=vlad. 2006-06-10 04:21:05 +04:00			`};`

			`/**********************************************************************`
			`* Unicode subranges as defined in unicode 3.0`
			`* x-western, x-central-euro, tr, x-baltic -> latin`
			`* 0000 - 036f`
			`* 1e00 - 1eff`
			`* 2000 - 206f (general punctuation)`
			`* 20a0 - 20cf (currency symbols)`
			`* 2100 - 214f (letterlike symbols)`
			`* 2150 - 218f (Number Forms)`
			`* el -> greek`
			`* 0370 - 03ff`
			`* 1f00 - 1fff`
			`* x-cyrillic -> cyrillic`
			`* 0400 - 04ff`
			`* he -> hebrew`
			`* 0590 - 05ff`
			`* ar -> arabic`
			`* 0600 - 06ff`
			`* fb50 - fdff (arabic presentation forms)`
			`* fe70 - feff (arabic presentation forms b)`
			`* th - thai`
			`* 0e00 - 0e7f`
			`* ko -> korean`
			`* ac00 - d7af (hangul Syllables)`
			`* 1100 - 11ff (jamo)`
			`* 3130 - 318f (hangul compatibility jamo)`
			`* ja`
			`* 3040 - 309f (hiragana)`
			`* 30a0 - 30ff (katakana)`
			`* zh-CN`
			`* zh-TW`
			`*`
			`* CJK`
			`* 3100 - 312f (bopomofo)`
			`* 31a0 - 31bf (bopomofo extended)`
			`* 3000 - 303f (CJK Symbols and Punctuation)`
			`* 2e80 - 2eff (CJK radicals supplement)`
			`* 2f00 - 2fdf (Kangxi Radicals)`
			`* 2ff0 - 2fff (Ideographic Description Characters)`
			`* 3190 - 319f (kanbun)`
			`* 3200 - 32ff (Enclosed CJK letters and Months)`
			`* 3300 - 33ff (CJK compatibility)`
			`* 3400 - 4dbf (CJK Unified Ideographs Extension A)`
			`* 4e00 - 9faf (CJK Unified Ideographs)`
			`* f900 - fa5f (CJK Compatibility Ideographs)`
			`* fe30 - fe4f (CJK compatibility Forms)`
			`* ff00 - ffef (halfwidth and fullwidth forms)`
			`*`
			`* Armenian`
			`* 0530 - 058f`
			`* Sriac`
			`* 0700 - 074f`
			`* Thaana`
			`* 0780 - 07bf`
			`* Devanagari`
			`* 0900 - 097f`
			`* Bengali`
			`* 0980 - 09ff`
			`* Gurmukhi`
			`* 0a00 - 0a7f`
			`* Gujarati`
			`* 0a80 - 0aff`
			`* Oriya`
			`* 0b00 - 0b7f`
			`* Tamil`
			`* 0b80 - 0bff`
			`* Telugu`
			`* 0c00 - 0c7f`
			`* Kannada`
			`* 0c80 - 0cff`
			`* Malayalam`
			`* 0d00 - 0d7f`
			`* Sinhala`
			`* 0d80 - 0def`
			`* Lao`
			`* 0e80 - 0eff`
			`* Tibetan`
			`* 0f00 - 0fbf`
			`* Myanmar`
			`* 1000 - 109f`
			`* Georgian`
			`* 10a0 - 10ff`
			`* Ethiopic`
			`* 1200 - 137f`
			`* Cherokee`
			`* 13a0 - 13ff`
			`* Canadian Aboriginal Syllabics`
			`* 1400 - 167f`
			`* Ogham`
			`* 1680 - 169f`
			`* Runic`
			`* 16a0 - 16ff`
			`* Khmer`
			`* 1780 - 17ff`
			`* Mongolian`
			`* 1800 - 18af`
			`* Misc - superscripts and subscripts`
			`* 2070 - 209f`
			`* Misc - Combining Diacritical Marks for Symbols`
			`* 20d0 - 20ff`
			`* Misc - Arrows`
			`* 2190 - 21ff`
			`* Misc - Mathematical Operators`
			`* 2200 - 22ff`
			`* Misc - Miscellaneous Technical`
			`* 2300 - 23ff`
			`* Misc - Control picture`
			`* 2400 - 243f`
			`* Misc - Optical character recognition`
			`* 2440 - 2450`
			`* Misc - Enclose Alphanumerics`
			`* 2460 - 24ff`
			`* Misc - Box Drawing`
			`* 2500 - 257f`
			`* Misc - Block Elements`
			`* 2580 - 259f`
			`* Misc - Geometric Shapes`
			`* 25a0 - 25ff`
			`* Misc - Miscellaneous Symbols`
			`* 2600 - 267f`
			`* Misc - Dingbats`
			`* 2700 - 27bf`
			`* Misc - Braille Patterns`
			`* 2800 - 28ff`
			`* Yi Syllables`
			`* a000 - a48f`
			`* Yi radicals`
			`* a490 - a4cf`
			`* Alphabetic Presentation Forms`
			`* fb00 - fb4f`
			`* Misc - Combining half Marks`
			`* fe20 - fe2f`
			`* Misc - small form variants`
			`* fe50 - fe6f`
			`* Misc - Specials`
			`* fff0 - ffff`
			`*********************************************************************/`



			`#define NUM_OF_SUBTABLES 9`
			`#define SUBTABLE_SIZE 16`

			`static const PRUint8 gUnicodeSubrangeTable[NUM_OF_SUBTABLES][SUBTABLE_SIZE] =`
			`{`
			`{ // table for X---`
			`kRangeTableBase+1, //u0xxx`
			`kRangeTableBase+2, //u1xxx`
			`kRangeTableBase+3, //u2xxx`
			`kRangeSetCJK, //u3xxx`
			`kRangeSetCJK, //u4xxx`
			`kRangeSetCJK, //u5xxx`
			`kRangeSetCJK, //u6xxx`
			`kRangeSetCJK, //u7xxx`
			`kRangeSetCJK, //u8xxx`
			`kRangeSetCJK, //u9xxx`
			`kRangeTableBase+4, //uaxxx`
			`kRangeKorean, //ubxxx`
			`kRangeKorean, //ucxxx`
			`kRangeTableBase+5, //udxxx`
			`kRangePrivate, //uexxx`
			`kRangeTableBase+6 //ufxxx`
			`},`
			`{ //table for 0X--`
			`kRangeSetLatin, //u00xx`
			`kRangeSetLatin, //u01xx`
			`kRangeSetLatin, //u02xx`
			`kRangeGreek, //u03xx XXX 0300-036f is in fact kRangeCombiningDiacriticalMarks`
			`kRangeCyrillic, //u04xx`
			`kRangeTableBase+7, //u05xx, includes Cyrillic supplement, Hebrew, and Armenian`
			`kRangeArabic, //u06xx`
			`kRangeTertiaryTable, //u07xx`
			`kRangeUnassigned, //u08xx`
			`kRangeTertiaryTable, //u09xx`
			`kRangeTertiaryTable, //u0axx`
			`kRangeTertiaryTable, //u0bxx`
			`kRangeTertiaryTable, //u0cxx`
			`kRangeTertiaryTable, //u0dxx`
			`kRangeTertiaryTable, //u0exx`
			`kRangeTibetan, //u0fxx`
			`},`
			`{ //table for 1x--`
			`kRangeTertiaryTable, //u10xx`
			`kRangeKorean, //u11xx`
			`kRangeEthiopic, //u12xx`
			`kRangeTertiaryTable, //u13xx`
			`kRangeCanadian, //u14xx`
			`kRangeCanadian, //u15xx`
			`kRangeTertiaryTable, //u16xx`
			`kRangeKhmer, //u17xx`
			`kRangeMongolian, //u18xx`
			`kRangeUnassigned, //u19xx`
			`kRangeUnassigned, //u1axx`
			`kRangeUnassigned, //u1bxx`
			`kRangeUnassigned, //u1cxx`
			`kRangeUnassigned, //u1dxx`
			`kRangeSetLatin, //u1exx`
			`kRangeGreek, //u1fxx`
			`},`
			`{ //table for 2x--`
			`kRangeSetLatin, //u20xx`
			`kRangeSetLatin, //u21xx`
			`kRangeMathOperators, //u22xx`
			`kRangeMiscTechnical, //u23xx`
			`kRangeControlOpticalEnclose, //u24xx`
			`kRangeBoxBlockGeometrics, //u25xx`
			`kRangeMiscSymbols, //u26xx`
			`kRangeDingbats, //u27xx`
			`kRangeBraillePattern, //u28xx`
			`kRangeUnassigned, //u29xx`
			`kRangeUnassigned, //u2axx`
			`kRangeUnassigned, //u2bxx`
			`kRangeUnassigned, //u2cxx`
			`kRangeUnassigned, //u2dxx`
			`kRangeSetCJK, //u2exx`
			`kRangeSetCJK, //u2fxx`
			`},`
			`{ //table for ax--`
			`kRangeYi, //ua0xx`
			`kRangeYi, //ua1xx`
			`kRangeYi, //ua2xx`
			`kRangeYi, //ua3xx`
			`kRangeYi, //ua4xx`
			`kRangeUnassigned, //ua5xx`
			`kRangeUnassigned, //ua6xx`
			`kRangeUnassigned, //ua7xx`
			`kRangeUnassigned, //ua8xx`
			`kRangeUnassigned, //ua9xx`
			`kRangeUnassigned, //uaaxx`
			`kRangeUnassigned, //uabxx`
			`kRangeKorean, //uacxx`
			`kRangeKorean, //uadxx`
			`kRangeKorean, //uaexx`
			`kRangeKorean, //uafxx`
			`},`
			`{ //table for dx--`
			`kRangeKorean, //ud0xx`
			`kRangeKorean, //ud1xx`
			`kRangeKorean, //ud2xx`
			`kRangeKorean, //ud3xx`
			`kRangeKorean, //ud4xx`
			`kRangeKorean, //ud5xx`
			`kRangeKorean, //ud6xx`
			`kRangeKorean, //ud7xx`
			`kRangeSurrogate, //ud8xx`
			`kRangeSurrogate, //ud9xx`
			`kRangeSurrogate, //udaxx`
			`kRangeSurrogate, //udbxx`
			`kRangeSurrogate, //udcxx`
			`kRangeSurrogate, //uddxx`
			`kRangeSurrogate, //udexx`
			`kRangeSurrogate, //udfxx`
			`},`
			`{ // table for fx--`
			`kRangePrivate, //uf0xx`
			`kRangePrivate, //uf1xx`
			`kRangePrivate, //uf2xx`
			`kRangePrivate, //uf3xx`
			`kRangePrivate, //uf4xx`
			`kRangePrivate, //uf5xx`
			`kRangePrivate, //uf6xx`
			`kRangePrivate, //uf7xx`
			`kRangePrivate, //uf8xx`
			`kRangeSetCJK, //uf9xx`
			`kRangeSetCJK, //ufaxx`
			`kRangeArabic, //ufbxx, includes alphabic presentation form`
			`kRangeArabic, //ufcxx`
			`kRangeArabic, //ufdxx`
			`kRangeArabic, //ufexx, includes Combining half marks,`
			`// CJK compatibility forms,`
			`// CJK compatibility forms,`
			`// small form variants`
			`kRangeTableBase+8, //uffxx, halfwidth and fullwidth forms, includes Specials`
			`},`
			`{ //table for 0x0500 - 0x05ff`
			`kRangeCyrillic, //u050x`
			`kRangeCyrillic, //u051x`
			`kRangeCyrillic, //u052x`
			`kRangeArmenian, //u053x`
			`kRangeArmenian, //u054x`
			`kRangeArmenian, //u055x`
			`kRangeArmenian, //u056x`
			`kRangeArmenian, //u057x`
			`kRangeArmenian, //u058x`
			`kRangeHebrew, //u059x`
			`kRangeHebrew, //u05ax`
			`kRangeHebrew, //u05bx`
			`kRangeHebrew, //u05cx`
			`kRangeHebrew, //u05dx`
			`kRangeHebrew, //u05ex`
			`kRangeHebrew, //u05fx`
			`},`
			`{ //table for 0xff00 - 0xffff`
			`kRangeSetCJK, //uff0x, fullwidth latin`
			`kRangeSetCJK, //uff1x, fullwidth latin`
			`kRangeSetCJK, //uff2x, fullwidth latin`
			`kRangeSetCJK, //uff3x, fullwidth latin`
			`kRangeSetCJK, //uff4x, fullwidth latin`
			`kRangeSetCJK, //uff5x, fullwidth latin`
			`kRangeSetCJK, //uff6x, halfwidth katakana`
			`kRangeSetCJK, //uff7x, halfwidth katakana`
			`kRangeSetCJK, //uff8x, halfwidth katakana`
			`kRangeSetCJK, //uff9x, halfwidth katakana`
			`kRangeSetCJK, //uffax, halfwidth hangul jamo`
			`kRangeSetCJK, //uffbx, halfwidth hangul jamo`
			`kRangeSetCJK, //uffcx, halfwidth hangul jamo`
			`kRangeSetCJK, //uffdx, halfwidth hangul jamo`
			`kRangeSetCJK, //uffex, fullwidth symbols`
			`kRangeSpecials, //ufffx, Specials`
			`},`
			`};`

			`// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)`
			`// code points so that the number of entries in the tertiary range`
			`// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.`
			`// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal`
			`// syllabaries take multiple chunks and Ogham and Runic share a single chunk.`
			`#define TERTIARY_TABLE_SIZE ((0x1700 - 0x0700) / 0x80)`

			`static const PRUint8 gUnicodeTertiaryRangeTable[TERTIARY_TABLE_SIZE] =`
			`{ //table for 0x0700 - 0x1600`
			`kRangeSyriac, //u070x`
			`kRangeThaana, //u078x`
			`kRangeUnassigned, //u080x place holder(resolved in the 2ndary tab.)`
			`kRangeUnassigned, //u088x place holder(resolved in the 2ndary tab.)`
			`kRangeDevanagari, //u090x`
			`kRangeBengali, //u098x`
			`kRangeGurmukhi, //u0a0x`
			`kRangeGujarati, //u0a8x`
			`kRangeOriya, //u0b0x`
			`kRangeTamil, //u0b8x`
			`kRangeTelugu, //u0c0x`
			`kRangeKannada, //u0c8x`
			`kRangeMalayalam, //u0d0x`
			`kRangeSinhala, //u0d8x`
			`kRangeThai, //u0e0x`
			`kRangeLao, //u0e8x`
			`kRangeTibetan, //u0f0x place holder(resolved in the 2ndary tab.)`
			`kRangeTibetan, //u0f8x place holder(resolved in the 2ndary tab.)`
			`kRangeMyanmar, //u100x`
			`kRangeGeorgian, //u108x`
			`kRangeKorean, //u110x place holder(resolved in the 2ndary tab.)`
			`kRangeKorean, //u118x place holder(resolved in the 2ndary tab.)`
			`kRangeEthiopic, //u120x place holder(resolved in the 2ndary tab.)`
			`kRangeEthiopic, //u128x place holder(resolved in the 2ndary tab.)`
			`kRangeEthiopic, //u130x`
			`kRangeCherokee, //u138x`
			`kRangeCanadian, //u140x place holder(resolved in the 2ndary tab.)`
			`kRangeCanadian, //u148x place holder(resolved in the 2ndary tab.)`
			`kRangeCanadian, //u150x place holder(resolved in the 2ndary tab.)`
			`kRangeCanadian, //u158x place holder(resolved in the 2ndary tab.)`
			`kRangeCanadian, //u160x`
			`kRangeOghamRunic, //u168x this contains two scripts, Ogham & Runic`
			`};`

			`// A two level index is almost enough for locating a range, with the`
			`// exception of u03xx and u05xx. Since we don't really care about range for`
			`// combining diacritical marks in our font application, they are`
			`// not discriminated further. But future adoption of this module for other use`
			`// should be aware of this limitation. The implementation can be extended if`
			`// there is such a need.`
			`// For Indic, Southeast Asian scripts and some other scripts between`
			`// U+0700 and U+16FF, it's extended to the third level.`
			`PRUint32 FindCharUnicodeRange(PRUnichar ch)`
			`{`
			`PRUint32 range;`

			`//search the first table`
			`range = gUnicodeSubrangeTable[0][ch >> 12];`

			`if (range < kRangeTableBase)`
			`// we try to get a specific range`
			`return range;`

			`// otherwise, we have one more table to look at`
			`range = gUnicodeSubrangeTable[range - kRangeTableBase][(ch & 0x0f00) >> 8];`
			`if (range < kRangeTableBase)`
			`return range;`
			`if (range < kRangeTertiaryTable)`
			`return gUnicodeSubrangeTable[range - kRangeTableBase][(ch & 0x00f0) >> 4];`

			`// Yet another table to look at : U+0700 - U+16FF : 128 code point blocks`
			`return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];`
			`}`

Bug 524107 - part 2 - store language as atom instead of string in gfxFontStyle. r=roc sr=mats 2010-02-24 20:57:57 +03:00			`nsIAtom *LangGroupFromUnicodeRange(PRUint8 unicodeRange)`
fixing windows fonts (bug 340590). r=vlad. 2006-06-10 04:21:05 +04:00			`{`
Bug 524107 - part 2 - store language as atom instead of string in gfxFontStyle. r=roc sr=mats 2010-02-24 20:57:57 +03:00			`if (kRangeSpecificItemNum > unicodeRange) {`
			`nsIAtom **atom = gUnicodeRangeToLangGroupAtomTable[unicodeRange];`
			`return *atom;`
			`}`
fixing windows fonts (bug 340590). r=vlad. 2006-06-10 04:21:05 +04:00			`return nsnull;`
			`}`