/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsLanguageAtomService.h" #include "nsUConvPropertySearch.h" #include "nsUnicharUtils.h" #include "nsAtom.h" #include "nsGkAtoms.h" #include "mozilla/ArrayUtils.h" #include "mozilla/ClearOnShutdown.h" #include "mozilla/Encoding.h" #include "mozilla/intl/OSPreferences.h" #include "mozilla/ServoBindings.h" #include "mozilla/ServoUtils.h" using namespace mozilla; using mozilla::intl::OSPreferences; static constexpr nsUConvProp encodingsGroups[] = { #include "encodingsgroups.properties.h" }; // List of mozilla internal x-* tags that map to themselves (see bug 256257) static constexpr nsStaticAtom* kLangGroups[] = { // This list must be sorted! nsGkAtoms::x_armn, nsGkAtoms::x_cyrillic, nsGkAtoms::x_devanagari, nsGkAtoms::x_geor, nsGkAtoms::x_math, nsGkAtoms::x_tamil, nsGkAtoms::Unicode, nsGkAtoms::x_western // These self-mappings are not necessary unless somebody use them to specify // lang in (X)HTML/XML documents, which they shouldn't. (see bug 256257) // x-beng=x-beng // x-cans=x-cans // x-ethi=x-ethi // x-guru=x-guru // x-gujr=x-gujr // x-khmr=x-khmr // x-mlym=x-mlym }; // Map ISO 15924 script codes from BCP47 lang tag to mozilla's langGroups. static constexpr struct { const char* mTag; nsStaticAtom* mAtom; } kScriptLangGroup[] = { // This list must be sorted by script code! {"Arab", nsGkAtoms::ar}, {"Armn", nsGkAtoms::x_armn}, {"Beng", nsGkAtoms::x_beng}, {"Cans", nsGkAtoms::x_cans}, {"Cyrl", nsGkAtoms::x_cyrillic}, {"Deva", nsGkAtoms::x_devanagari}, {"Ethi", nsGkAtoms::x_ethi}, {"Geok", nsGkAtoms::x_geor}, {"Geor", nsGkAtoms::x_geor}, {"Grek", nsGkAtoms::el}, {"Gujr", nsGkAtoms::x_gujr}, {"Guru", nsGkAtoms::x_guru}, {"Hang", nsGkAtoms::ko}, {"Hani", nsGkAtoms::Japanese}, {"Hans", nsGkAtoms::Chinese}, // Hant is special-cased in code // Hant=zh-HK // Hant=zh-TW {"Hebr", nsGkAtoms::he}, {"Hira", nsGkAtoms::Japanese}, {"Jpan", nsGkAtoms::Japanese}, {"Kana", nsGkAtoms::Japanese}, {"Khmr", nsGkAtoms::x_khmr}, {"Knda", nsGkAtoms::x_knda}, {"Kore", nsGkAtoms::ko}, {"Latn", nsGkAtoms::x_western}, {"Mlym", nsGkAtoms::x_mlym}, {"Orya", nsGkAtoms::x_orya}, {"Sinh", nsGkAtoms::x_sinh}, {"Taml", nsGkAtoms::x_tamil}, {"Telu", nsGkAtoms::x_telu}, {"Thai", nsGkAtoms::th}, {"Tibt", nsGkAtoms::x_tibt}}; // static nsLanguageAtomService* nsLanguageAtomService::GetService() { static UniquePtr gLangAtomService; if (!gLangAtomService) { gLangAtomService = MakeUnique(); ClearOnShutdown(&gLangAtomService); } return gLangAtomService.get(); } nsStaticAtom* nsLanguageAtomService::LookupLanguage( const nsACString& aLanguage) { nsAutoCString lowered(aLanguage); ToLowerCase(lowered); RefPtr lang = NS_Atomize(lowered); return GetLanguageGroup(lang); } already_AddRefed nsLanguageAtomService::LookupCharSet( NotNull aEncoding) { nsAutoCString charset; aEncoding->Name(charset); nsAutoCString group; if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue( encodingsGroups, ArrayLength(encodingsGroups), charset, group))) { return RefPtr(nsGkAtoms::Unicode).forget(); } return NS_Atomize(group); } nsAtom* nsLanguageAtomService::GetLocaleLanguage() { do { if (!mLocaleLanguage) { AutoTArray regionalPrefsLocales; if (NS_SUCCEEDED(OSPreferences::GetInstance()->GetRegionalPrefsLocales( regionalPrefsLocales))) { // use lowercase for all language atoms ToLowerCase(regionalPrefsLocales[0]); mLocaleLanguage = NS_Atomize(regionalPrefsLocales[0]); } else { nsAutoCString locale; OSPreferences::GetInstance()->GetSystemLocale(locale); ToLowerCase(locale); // use lowercase for all language atoms mLocaleLanguage = NS_Atomize(locale); } } } while (0); return mLocaleLanguage; } nsStaticAtom* nsLanguageAtomService::GetLanguageGroup(nsAtom* aLanguage, bool* aNeedsToCache) { if (nsStaticAtom* group = mLangToGroup.Get(aLanguage)) { return group; } if (aNeedsToCache) { *aNeedsToCache = true; return nullptr; } AssertIsMainThreadOrServoFontMetricsLocked(); nsStaticAtom* group = GetUncachedLanguageGroup(aLanguage); mLangToGroup.Put(aLanguage, group); return group; } nsStaticAtom* nsLanguageAtomService::GetUncachedLanguageGroup( nsAtom* aLanguage) const { nsAutoCString langStr; aLanguage->ToUTF8String(langStr); ToLowerCase(langStr); if (langStr[0] == 'x' && langStr[1] == '-') { // Internal x-* langGroup codes map to themselves (see bug 256257) for (nsStaticAtom* langGroup : kLangGroups) { if (langGroup == aLanguage) { return langGroup; } if (aLanguage->IsAsciiLowercase()) { continue; } // Do the slow ascii-case-insensitive comparison just if needed. nsDependentAtomString string(langGroup); if (string.EqualsASCII(langStr.get(), langStr.Length())) { return langGroup; } } } else { // If the lang code can be parsed as BCP47, look up its (likely) script Locale loc(langStr); if (loc.IsWellFormed()) { if (loc.GetScript().IsEmpty()) { loc.AddLikelySubtags(); } if (loc.GetScript().EqualsLiteral("Hant")) { if (loc.GetRegion().EqualsLiteral("HK")) { return nsGkAtoms::HongKongChinese; } return nsGkAtoms::Taiwanese; } else { size_t foundIndex; const nsCString& script = loc.GetScript(); if (BinarySearchIf( kScriptLangGroup, 0, ArrayLength(kScriptLangGroup), [script](const auto& entry) -> int { return script.Compare(entry.mTag); }, &foundIndex)) { return kScriptLangGroup[foundIndex].mAtom; } } } } // Fall back to x-unicode if no match was found return nsGkAtoms::Unicode; }