updating our unicode range table. bug 376298. r=vlad

This commit is contained in:
pavlov@pavlov.net 2007-04-02 16:19:38 -07:00
Родитель 3ef6fbe62d
Коммит 73357a9d51
1 изменённых файлов: 156 добавлений и 116 удалений

Просмотреть файл

@ -101,7 +101,11 @@ private:
/* Unicode subrange table
* from: http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/unicode_63ub.asp
*/
*
* Use something like:
* perl -pi -e 's/^(\d+)\s+([\dA-Fa-f]+)\s+-\s+([\dA-Fa-f]+)\s+\b(.*)/ { \1, 0x\2, 0x\3,\"\4\" },/' < unicoderanges.txt
* to generate the below list.
*/
struct UnicodeRangeTableEntry
{
PRUint8 bit;
@ -111,122 +115,158 @@ struct UnicodeRangeTableEntry
};
static const struct UnicodeRangeTableEntry gUnicodeRanges[] = {
{ 0, 0x40, 0x5a, "Basic Latin" },
{ 0, 0x60, 0x7a, "Basic Latin" },
{ 1, 0xa0, 0xff, "Latin-1 Supplement" },
{ 2, 0x100, 0x17f, "Latin Extended-A" },
{ 3, 0x180, 0x24f, "Latin Extended-B" },
{ 4, 0x250, 0x2af, "IPA Extensions" },
{ 5, 0x2b0, 0x2ff, "Spacing Modifier Letters" },
{ 6, 0x300, 0x36f, "Combining Diacritical Marks" },
{ 7, 0x370, 0x3ff, "Greek and Coptic" },
/* 8 - reserved */
{ 9, 0x400, 0x4ff, "Cyrillic" },
{ 9, 0x500, 0x52f, "Cyrillic Supplementary" },
{ 10, 0x530, 0x58f, "Armenian" },
{ 11, 0x590, 0x5ff, "Basic Hebrew" },
{ 0, 0x0000, 0x007F, "Basic Latin" },
{ 1, 0x0080, 0x00FF, "Latin-1 Supplement" },
{ 2, 0x0100, 0x017F, "Latin Extended-A" },
{ 3, 0x0180, 0x024F, "Latin Extended-B" },
{ 4, 0x0250, 0x02AF, "IPA Extensions" },
{ 4, 0x1D00, 0x1D7F, "Phonetic Extensions" },
{ 4, 0x1D80, 0x1DBF, "Phonetic Extensions Supplement" },
{ 5, 0x02B0, 0x02FF, "Spacing Modifier Letters" },
{ 5, 0xA700, 0xA71F, "Modifier Tone Letters" },
{ 6, 0x0300, 0x036F, "Spacing Modifier Letters" },
{ 6, 0x1DC0, 0x1DFF, "Combining Diacritical Marks Supplement" },
{ 7, 0x0370, 0x03FF, "Greek and Coptic" },
{ 8, 0x2C80, 0x2CFF, "Coptic" },
{ 9, 0x0400, 0x04FF, "Cyrillic" },
{ 9, 0x0500, 0x052F, "Cyrillic Supplementary" },
{ 10, 0x0530, 0x058F, "Armenian" },
{ 11, 0x0590, 0x05FF, "Basic Hebrew" },
/* 12 - reserved */
{ 13, 0x600, 0x6ff, "Basic Arabic" },
/* 14 - reserved */
{ 15, 0x900, 0x97f, "Devanagari" },
{ 16, 0x980, 0x9ff, "Bengali" },
{ 17, 0xa00, 0xa7f, "Gurmukhi" },
{ 18, 0xa80, 0xaff, "Gujarati" },
{ 19, 0xb00, 0xb7f, "Oriya" },
{ 20, 0xb80, 0xbff, "Tamil" },
{ 21, 0xc00, 0xc7f, "Telugu" },
{ 22, 0xc80, 0xcff, "Kannada" },
{ 23, 0xd00, 0xd7f, "Malayalam" },
{ 24, 0xe00, 0xe7f, "Thai" },
{ 25, 0xe80, 0xeff, "Lao" },
{ 26, 0x10a0, 0x10ff, "Georgian" },
/* 27 - reserved */
{ 28, 0x1100, 0x11ff, "Hangul Jamo" },
{ 29, 0x1e00, 0x1eff, "Latin Extended Additional" },
{ 30, 0x1f00, 0x1fff, "Greek Extended" },
{ 31, 0x2000, 0x206f, "General Punctuation" },
{ 32, 0x2070, 0x209f, "Subscripts and Superscripts" },
{ 33, 0x20a0, 0x20cf, "Currency Symbols" },
{ 34, 0x20d0, 0x20ff, "Combining Diacritical Marks for Symbols" },
{ 35, 0x2100, 0x214f, "Letter-like Symbols" },
{ 36, 0x2150, 0x218f, "Number Forms" },
{ 37, 0x2190, 0x21ff, "Arrows" },
{ 37, 0x27f0, 0x27ff, "Supplemental Arrows-A" },
{ 37, 0x2900, 0x297f, "Supplemental Arrows-B" },
{ 38, 0x2200, 0x22ff, "Mathematical Operators" },
{ 38, 0x2a00, 0x2aff, "Supplemental Mathematical Operators" },
{ 38, 0x27c0, 0x27ef, "Miscellaneous Mathematical Symbols-A" },
{ 38, 0x2980, 0x29ff, "Miscellaneous Mathematical Symbols-B" },
{ 39, 0x2300, 0x23ff, "Miscellaneous Technical" },
{ 40, 0x2400, 0x243f, "Control Pictures" },
{ 41, 0x2440, 0x245f, "Optical Character Recognition" },
{ 42, 0x2460, 0x24ff, "Enclosed Alphanumerics" },
{ 43, 0x2500, 0x257f, "Box Drawing" },
{ 44, 0x2580, 0x259f, "Block Elements" },
{ 45, 0x25a0, 0x25ff, "Geometric Shapes" },
{ 46, 0x2600, 0x26ff, "Miscellaneous Symbols" },
{ 47, 0x2700, 0x27bf, "Dingbats" },
{ 48, 0x3000, 0x303f, "Chinese, Japanese, and Korean (CJK) Symbols and Punctuation" },
{ 49, 0x3040, 0x309f, "Hiragana" },
{ 50, 0x30a0, 0x30ff, "Katakana" },
{ 50, 0x31f0, 0x31ff, "Katakana Phonetic Extensions" },
{ 51, 0x3100, 0x312f, "Bopomofo" },
{ 51, 0x31a0, 0x31bf, "Extended Bopomofo" },
{ 52, 0x3130, 0x318f, "Hangul Compatibility Jamo" },
/* 53 - reserved */
{ 54, 0x3200, 0x32ff, "Enclosed CJK Letters and Months" },
{ 55, 0x3300, 0x33ff, "CJK Compatibility" },
{ 56, 0xac00, 0xd7a3, "Hangul" },
{ 57, 0xd800, 0xdfff, "Surrogates. Note that setting this bit implies that there is at least one supplementary code point (beyond the Basic Multilingual Plane, or BMP) that is supported by this font. See Surrogates and Supplementary Characters." },
/* 58 - reserved */
{ 59, 0x4e00, 0x9fff, "CJK Unified Ideographs" },
{ 59, 0x2e80, 0x2eff, "CJK Radicals Supplement" },
{ 59, 0x2f00, 0x2fdf, "Kangxi Radicals" },
{ 59, 0x2ff0, 0x2fff, "Ideographic Description" },
{ 59, 0x3190, 0x319f, "Kanbun" },
{ 59, 0x3400, 0x4dbf, "CJK Unified Ideographs Extension A" },
{ 59, 0x20000, 0x2a6df, "CJK Unified Ideographs Extension B" },
{ 60, 0xe000, 0xf8ff, "Private Use Area" },
{ 61, 0xf900, 0xfaff, "CJK Compatibility Ideographs" },
{ 61, 0x2f800, 0x2fa1f, "CJK Compatibility Ideographs Supplement" },
{ 62, 0xfb00, 0xfb4f, "Alphabetical Presentation Forms" },
{ 63, 0xfb50, 0xfdff, "Arabic Presentation Forms-A" },
{ 64, 0xfe20, 0xfe2f, "Combining Half Marks" },
{ 65, 0xfe30, 0xfe4f, "CJK Compatibility Forms" },
{ 66, 0xfe50, 0xfe6f, "Small Form Variants" },
{ 67, 0xfe70, 0xfefe, "Arabic Presentation Forms-B" },
{ 68, 0xff00, 0xffef, "Halfwidth and Fullwidth Forms" },
{ 69, 0xfff0, 0xffff, "Specials" },
{ 70, 0xf00, 0xfff, "Tibetan" },
{ 71, 0x700, 0x74f, "Syriac" },
{ 72, 0x780, 0x7bf, "Thaana" },
{ 73, 0xd80, 0xdff, "Sinhala" },
{ 74, 0x1000, 0x109f, "Myanmar" },
{ 75, 0x1200, 0x12bf, "Ethiopic" },
{ 76, 0x13a0, 0x13ff, "Cherokee" },
{ 77, 0x1400, 0x167f, "Canadian Aboriginal Syllabics" },
{ 78, 0x1680, 0x169f, "Ogham" },
{ 79, 0x16a0, 0x16ff, "Runic" },
{ 80, 0x1780, 0x17ff, "Khmer" },
{ 80, 0x19e0, 0x19ff, "Khmer Symbols" },
{ 81, 0x1800, 0x18af, "Mongolian" },
{ 82, 0x2800, 0x28ff, "Braille" },
{ 83, 0xa000, 0xa48f, "Yi" },
{ 83, 0xa480, 0xa4cf, "Yi Radicals" },
{ 84, 0x1700, 0x171f, "Tagalog" },
{ 84, 0x1720, 0x173f, "Hanunoo" },
{ 84, 0x1740, 0x175f, "Buhid" },
{ 84, 0x1760, 0x177f, "Tagbanwa" },
{ 85, 0x10300, 0x1032f, "Old Italic" },
{ 86, 0x10330, 0x1034f, "Gothic" },
{ 87, 0x10440, 0x1044f, "Deseret" },
{ 88, 0x1d000, 0x1d0ff, "Byzantine Musical Symbols" },
{ 88, 0x1d100, 0x1d1ff, "Musical Symbols" },
{ 89, 0x1d400, 0x1d7ff, "Mathematical Alphanumeric Symbols" },
{ 90, 0xfff80, 0xfffff, "Private Use (Plane 15)" },
{ 90, 0x10ff80, 0x10ffff, "Private Use (Plane 16)" },
{ 91, 0xe0100, 0xe01ef, "Variation Selectors" },
{ 92, 0xe0000, 0xe007f , "Tags" }
{ 13, 0x0600, 0x06FF, "Basic Arabic" },
{ 13, 0x0750, 0x077F, "Arabic Supplement" },
{ 14, 0x07C0, 0x07FF, "N'Ko" },
{ 15, 0x0900, 0x097F, "Devanagari" },
{ 16, 0x0980, 0x09FF, "Bengali" },
{ 17, 0x0A00, 0x0A7F, "Gurmukhi" },
{ 18, 0x0A80, 0x0AFF, "Gujarati" },
{ 19, 0x0B00, 0x0B7F, "Oriya" },
{ 20, 0x0B80, 0x0BFF, "Tamil" },
{ 21, 0x0C00, 0x0C7F, "Telugu" },
{ 22, 0x0C80, 0x0CFF, "Kannada" },
{ 23, 0x0D00, 0x0D7F, "Malayalam" },
{ 24, 0x0E00, 0x0E7F, "Thai" },
{ 25, 0x0E80, 0x0EFF, "Lao" },
{ 26, 0x10A0, 0x10FF, "Georgian" },
{ 26, 0x2D00, 0x2D2F, "Georgian Supplement" },
{ 27, 0x1B00, 0x1B7F, "Balinese" },
{ 28, 0x1100, 0x11FF, "Hangul Jamo" },
{ 29, 0x1E00, 0x1EFF, "Latin Extended Additional" },
{ 29, 0x2C60, 0x2C7F, "Latin Extended-C" },
{ 30, 0x1F00, 0x1FFF, "Greek Extended" },
{ 31, 0x2000, 0x206F, "General Punctuation" },
{ 31, 0x2E00, 0x2E7F, "Supplemental Punctuation" },
{ 32, 0x2070, 0x209F, "Subscripts and Superscripts" },
{ 33, 0x20A0, 0x20CF, "Currency Symbols" },
{ 34, 0x20D0, 0x20FF, "Combining Diacritical Marks for Symbols" },
{ 35, 0x2100, 0x214F, "Letter-like Symbols" },
{ 36, 0x2150, 0x218F, "Number Forms" },
{ 37, 0x2190, 0x21FF, "Arrows" },
{ 37, 0x27F0, 0x27FF, "Supplemental Arrows-A" },
{ 37, 0x2900, 0x297F, "Supplemental Arrows-B" },
{ 37, 0x2B00, 0x2BFF, "Miscellaneous Symbols and Arrows" },
{ 38, 0x2200, 0x22FF, "Mathematical Operators" },
{ 38, 0x27C0, 0x27EF, "Miscellaneous Mathematical Symbols-A" },
{ 38, 0x2980, 0x29FF, "Miscellaneous Mathematical Symbols-B" },
{ 38, 0x2A00, 0x2AFF, "Supplemental Mathematical Operators" },
{ 39, 0x2300, 0x23FF, "Miscellaneous Technical" },
{ 40, 0x2400, 0x243F, "Control Pictures" },
{ 41, 0x2440, 0x245F, "Optical Character Recognition" },
{ 42, 0x2460, 0x24FF, "Enclosed Alphanumerics" },
{ 43, 0x2500, 0x257F, "Box Drawing" },
{ 44, 0x2580, 0x259F, "Block Elements" },
{ 45, 0x25A0, 0x25FF, "Geometric Shapes" },
{ 46, 0x2600, 0x26FF, "Miscellaneous Symbols" },
{ 47, 0x2700, 0x27BF, "Dingbats" },
{ 48, 0x3000, 0x303F, "Chinese, Japanese, and Korean (CJK) Symbols and Punctuation" },
{ 49, 0x3040, 0x309F, "Hiragana" },
{ 50, 0x30A0, 0x30FF, "Katakana" },
{ 50, 0x31F0, 0x31FF, "Katakana Phonetic Extensions" },
{ 51, 0x3100, 0x312F, "Bopomofo" },
{ 51, 0x31A0, 0x31BF, "Extended Bopomofo" },
{ 52, 0x3130, 0x318F, "Hangul Compatibility Jamo" },
{ 53, 0xA840, 0xA87F, "Phags-pa" },
{ 54, 0x3200, 0x32FF, "Enclosed CJK Letters and Months" },
{ 55, 0x3300, 0x33FF, "CJK Compatibility" },
{ 56, 0xAC00, 0xD7A3, "Hangul" },
{ 57, 0xD800, 0xDFFF, "Surrogates. Note that setting this bit implies that there is at least one supplementary code point beyond the Basic Multilingual Plane (BMP) that is supported by this font. See Surrogates and Supplementary Characters." },
{ 58, 0x10900, 0x1091F, "Phoenician" },
{ 59, 0x2E80, 0x2EFF, "CJK Radicals Supplement" },
{ 59, 0x2F00, 0x2FDF, "Kangxi Radicals" },
{ 59, 0x2FF0, 0x2FFF, "Ideographic Description Characters" },
{ 59, 0x3190, 0x319F, "Kanbun" },
{ 59, 0x3400, 0x4DBF, "CJK Unified Ideographs Extension A" },
{ 59, 0x4E00, 0x9FFF, "CJK Unified Ideographs" },
{ 59, 0x20000, 0x2A6DF, "CJK Unified Ideographs Extension B" },
{ 60, 0xE000, 0xF8FF, "Private Use (Plane 0)" },
{ 61, 0x31C0, 0x31EF, "CJK Base Strokes" },
{ 61, 0xF900, 0xFAFF, "CJK Compatibility Ideographs" },
{ 61, 0x2F800, 0x2FA1F, "CJK Compatibility Ideographs Supplement" },
{ 62, 0xFB00, 0xFB4F, "Alphabetical Presentation Forms" },
{ 63, 0xFB50, 0xFDFF, "Arabic Presentation Forms-A" },
{ 64, 0xFE20, 0xFE2F, "Combining Half Marks" },
{ 65, 0xFE10, 0xFE1F, "Vertical Forms" },
{ 65, 0xFE30, 0xFE4F, "CJK Compatibility Forms" },
{ 66, 0xFE50, 0xFE6F, "Small Form Variants" },
{ 67, 0xFE70, 0xFEFE, "Arabic Presentation Forms-B" },
{ 68, 0xFF00, 0xFFEF, "Halfwidth and Fullwidth Forms" },
{ 69, 0xFFF0, 0xFFFF, "Specials" },
{ 70, 0x0F00, 0x0FFF, "Tibetan" },
{ 71, 0x0700, 0x074F, "Syriac" },
{ 72, 0x0780, 0x07BF, "Thaana" },
{ 73, 0x0D80, 0x0DFF, "Sinhala" },
{ 74, 0x1000, 0x109F, "Myanmar" },
{ 75, 0x1200, 0x137F, "Ethiopic" },
{ 75, 0x1380, 0x139F, "Ethiopic Supplement" },
{ 75, 0x2D80, 0x2DDF, "Ethiopic Extended" },
{ 76, 0x13A0, 0x13FF, "Cherokee" },
{ 77, 0x1400, 0x167F, "Canadian Aboriginal Syllabics" },
{ 78, 0x1680, 0x169F, "Ogham" },
{ 79, 0x16A0, 0x16FF, "Runic" },
{ 80, 0x1780, 0x17FF, "Khmer" },
{ 80, 0x19E0, 0x19FF, "Khmer Symbols" },
{ 81, 0x1800, 0x18AF, "Mongolian" },
{ 82, 0x2800, 0x28FF, "Braille" },
{ 83, 0xA000, 0xA48F, "Yi" },
{ 83, 0xA490, 0xA4CF, "Yi Radicals" },
{ 84, 0x1700, 0x171F, "Tagalog" },
{ 84, 0x1720, 0x173F, "Hanunoo" },
{ 84, 0x1740, 0x175F, "Buhid" },
{ 84, 0x1760, 0x177F, "Tagbanwa" },
{ 85, 0x10300, 0x1032F, "Old Italic" },
{ 86, 0x10330, 0x1034F, "Gothic" },
{ 87, 0x10440, 0x1044F, "Deseret" },
{ 88, 0x1D000, 0x1D0FF, "Byzantine Musical Symbols" },
{ 88, 0x1D100, 0x1D1FF, "Musical Symbols" },
{ 88, 0x1D200, 0x1D24F, "Ancient Greek Musical Notation" },
{ 89, 0x1D400, 0x1D7FF, "Mathematical Alphanumeric Symbols" },
{ 90, 0xFF000, 0xFFFFD, "Private Use (Plane 15)" },
{ 90, 0x100000, 0x10FFFD, "Private Use (Plane 16)" },
{ 91, 0xFE00, 0xFE0F, "Variation Selectors" },
{ 91, 0xE0100, 0xE01EF, "Variation Selectors Supplement" },
{ 92, 0xE0000, 0xE007F, "Tags" },
{ 93, 0x1900, 0x194F, "Limbu" },
{ 94, 0x1950, 0x197F, "Tai Le" },
{ 95, 0x1980, 0x19DF, "New Tai Lue" },
{ 96, 0x1A00, 0x1A1F, "Buginese" },
{ 97, 0x2C00, 0x2C5F, "Glagolitic" },
{ 98, 0x2D40, 0x2D7F, "Tifinagh" },
{ 99, 0x4DC0, 0x4DFF, "Yijing Hexagram Symbols" },
{ 100, 0xA800, 0xA82F, "Syloti Nagri" },
{ 101, 0x10000, 0x1007F, "Linear B Syllabary" },
{ 101, 0x10080, 0x100FF, "Linear B Ideograms" },
{ 101, 0x10100, 0x1013F, "Aegean Numbers" },
{ 102, 0x10140, 0x1018F, "Ancient Greek Numbers" },
{ 103, 0x10380, 0x1039F, "Ugaritic" },
{ 104, 0x103A0, 0x103DF, "Old Persian" },
{ 105, 0x10450, 0x1047F, "Shavian" },
{ 106, 0x10480, 0x104AF, "Osmanya" },
{ 107, 0x10800, 0x1083F, "Cypriot Syllabary" },
{ 108, 0x10A00, 0x10A5F, "Kharoshthi" },
{ 109, 0x1D300, 0x1D35F, "Tai Xuan Jing Symbols" },
{ 110, 0x12000, 0x123FF, "Cuneiform" },
{ 110, 0x12400, 0x1247F, "Cuneiform Numbers and Punctuation" },
{ 111, 0x1D360, 0x1D37F, "Counting Rod Numerals" }
};
static PRUint8 CharRangeBit(PRUint32 ch) {