From 3684744df4333103a114803c6752eb8ea0434116 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Bargull?= Date: Thu, 4 May 2017 05:04:14 -0700 Subject: [PATCH] Bug 1321789 - Support Unicode extensions with multiple value subtags in BCP47 language tags. r=Waldo --HG-- extra : rebase_source : e53994788a06eda8435c314735d22410f45b2c81 --- js/src/builtin/Intl.cpp | 79 +++---- js/src/builtin/Intl.js | 194 ++++++++++++------ .../Intl/DateTimeFormat/calendar-aliases.js | 35 ++++ js/src/tests/Intl/DateTimeFormat/islamic.js | 89 ++++++++ 4 files changed, 298 insertions(+), 99 deletions(-) create mode 100644 js/src/tests/Intl/DateTimeFormat/calendar-aliases.js create mode 100644 js/src/tests/Intl/DateTimeFormat/islamic.js diff --git a/js/src/builtin/Intl.cpp b/js/src/builtin/Intl.cpp index 60795cc6e6e6..380e75832577 100644 --- a/js/src/builtin/Intl.cpp +++ b/js/src/builtin/Intl.cpp @@ -83,8 +83,8 @@ using mozilla::RangedPtr; * bit rot. The following stub implementations for ICU functions make this * possible. The functions using them should never be called, so they assert * and return error codes. Signatures adapted from ICU header files locid.h, - * numsys.h, ucal.h, ucol.h, udat.h, udatpg.h, uenum.h, unum.h; see the ICU - * directory for license. + * numsys.h, ucal.h, ucol.h, udat.h, udatpg.h, uenum.h, unum.h, uloc.h; + * see the ICU directory for license. */ namespace { @@ -831,6 +831,12 @@ u_strToUpper(UChar* dest, int32_t destCapacity, const UChar* src, int32_t srcLen MOZ_CRASH("u_strToUpper: Intl API disabled"); } +const char* +uloc_toUnicodeLocaleType(const char* keyword, const char* value) +{ + MOZ_CRASH("uloc_toUnicodeLocaleType: Intl API disabled"); +} + } // anonymous namespace #endif @@ -887,11 +893,8 @@ LegacyIntlInitialize(JSContext* cx, HandleObject obj, Handle init // CountAvailable and GetAvailable describe the signatures used for ICU API // to determine available locales for various functionality. -typedef int32_t -(* CountAvailable)(); - -typedef const char* -(* GetAvailable)(int32_t localeIndex); +using CountAvailable = int32_t (*)(); +using GetAvailable = const char* (*)(int32_t localeIndex); static bool intl_availableLocales(JSContext* cx, CountAvailable countAvailable, @@ -902,6 +905,7 @@ intl_availableLocales(JSContext* cx, CountAvailable countAvailable, return false; #if ENABLE_INTL_API + RootedAtom a(cx); uint32_t count = countAvailable(); RootedValue t(cx, BooleanValue(true)); for (uint32_t i = 0; i < count; i++) { @@ -912,7 +916,7 @@ intl_availableLocales(JSContext* cx, CountAvailable countAvailable, char* p; while ((p = strchr(lang.get(), '_'))) *p = '-'; - RootedAtom a(cx, Atomize(cx, lang.get(), strlen(lang.get()))); + a = Atomize(cx, lang.get(), strlen(lang.get())); if (!a) return false; if (!DefineProperty(cx, locales, a->asPropertyName(), t, nullptr, nullptr, @@ -1211,6 +1215,8 @@ js::intl_availableCollations(JSContext* cx, unsigned argc, Value* vp) if (!DefineElement(cx, collations, index++, NullHandleValue)) return false; + RootedString jscollation(cx); + RootedValue element(cx); for (uint32_t i = 0; i < count; i++) { const char* collation = uenum_next(values, nullptr, &status); if (U_FAILURE(status)) { @@ -1225,21 +1231,11 @@ js::intl_availableCollations(JSContext* cx, unsigned argc, Value* vp) if (equal(collation, "standard") || equal(collation, "search")) continue; - // ICU returns old-style keyword values; map them to BCP 47 equivalents - // (see http://bugs.icu-project.org/trac/ticket/9620). - if (equal(collation, "dictionary")) - collation = "dict"; - else if (equal(collation, "gb2312han")) - collation = "gb2312"; - else if (equal(collation, "phonebook")) - collation = "phonebk"; - else if (equal(collation, "traditional")) - collation = "trad"; - - RootedString jscollation(cx, JS_NewStringCopyZ(cx, collation)); + // ICU returns old-style keyword values; map them to BCP 47 equivalents. + jscollation = JS_NewStringCopyZ(cx, uloc_toUnicodeLocaleType("co", collation)); if (!jscollation) return false; - RootedValue element(cx, StringValue(jscollation)); + element = StringValue(jscollation); if (!DefineElement(cx, collations, index++, element)) return false; } @@ -2678,19 +2674,16 @@ js::intl_DateTimeFormat_availableLocales(JSContext* cx, unsigned argc, Value* vp return true; } -// ICU returns old-style keyword values; map them to BCP 47 equivalents -// (see http://bugs.icu-project.org/trac/ticket/9620). -static const char* -bcp47CalendarName(const char* icuName) +struct CalendarAlias { - if (equal(icuName, "ethiopic-amete-alem")) - return "ethioaa"; - if (equal(icuName, "gregorian")) - return "gregory"; - if (equal(icuName, "islamic-civil")) - return "islamicc"; - return icuName; -} + const char* const calendar; + const char* const alias; +}; + +const CalendarAlias calendarAliases[] = { + { "islamic-civil", "islamicc" }, + { "ethioaa", "ethiopic-amete-alem" } +}; bool js::intl_availableCalendars(JSContext* cx, unsigned argc, Value* vp) @@ -2723,7 +2716,8 @@ js::intl_availableCalendars(JSContext* cx, unsigned argc, Value* vp) return false; } - jscalendar = JS_NewStringCopyZ(cx, bcp47CalendarName(calendar)); + // ICU returns old-style keyword values; map them to BCP 47 equivalents + jscalendar = JS_NewStringCopyZ(cx, uloc_toUnicodeLocaleType("ca", calendar)); if (!jscalendar) return false; } @@ -2753,12 +2747,27 @@ js::intl_availableCalendars(JSContext* cx, unsigned argc, Value* vp) return false; } - jscalendar = JS_NewStringCopyZ(cx, bcp47CalendarName(calendar)); + // ICU returns old-style keyword values; map them to BCP 47 equivalents + calendar = uloc_toUnicodeLocaleType("ca", calendar); + + jscalendar = JS_NewStringCopyZ(cx, calendar); if (!jscalendar) return false; element = StringValue(jscalendar); if (!DefineElement(cx, calendars, index++, element)) return false; + + // ICU doesn't return calendar aliases, append them here. + for (const auto& calendarAlias : calendarAliases) { + if (equal(calendar, calendarAlias.calendar)) { + jscalendar = JS_NewStringCopyZ(cx, calendarAlias.alias); + if (!jscalendar) + return false; + element = StringValue(jscalendar); + if (!DefineElement(cx, calendars, index++, element)) + return false; + } + } } args.rval().setObject(*calendars); diff --git a/js/src/builtin/Intl.js b/js/src/builtin/Intl.js index 5276f4ce0f31..ae96c077673d 100644 --- a/js/src/builtin/Intl.js +++ b/js/src/builtin/Intl.js @@ -382,7 +382,7 @@ function CanonicalizeLanguageTag(locale) { if (hasOwn(locale, langTagMappings)) return langTagMappings[locale]; - var subtags = StringSplitString(ToString(locale), "-"); + var subtags = StringSplitString(locale, "-"); var i = 0; // Handle the standard part: All subtags before the first singleton or "x". @@ -930,10 +930,7 @@ function LookupMatcher(availableLocales, requestedLocales) { if (locale !== noExtensionsLocale) { var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE(); var extensionMatch = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, locale); - var extension = extensionMatch[0]; - var extensionIndex = extensionMatch.index; - result.extension = extension; - result.extensionIndex = extensionIndex; + result.extension = extensionMatch[0]; } } else { result.locale = DefaultLocale(); @@ -957,6 +954,79 @@ function BestFitMatcher(availableLocales, requestedLocales) { } +/** + * Returns the Unicode extension value subtags for the requested key subtag. + * + * NOTE: PR to add UnicodeExtensionValue to ECMA-402 isn't yet written. + */ +function UnicodeExtensionValue(extension, key) { + assert(typeof extension === "string", "extension is a string value"); + assert(function() { + var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE(); + var extensionMatch = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, extension); + return extensionMatch !== null && extensionMatch[0] === extension; + }(), "extension is a Unicode extension subtag"); + assert(typeof key === "string", "key is a string value"); + assert(key.length === 2, "key is a Unicode extension key subtag"); + + // Step 1. + var size = extension.length; + + // Step 2. + var searchValue = "-" + key + "-"; + + // Step 3. + var pos = callFunction(std_String_indexOf, extension, searchValue); + + // Step 4. + if (pos !== -1) { + // Step 4.a. + var start = pos + 4; + + // Step 4.b. + var end = start; + + // Step 4.c. + var k = start; + + // Steps 4.d-e. + while (true) { + // Step 4.e.i. + var e = callFunction(std_String_indexOf, extension, "-", k); + + // Step 4.e.ii. + var len = e === -1 ? size - k : e - k; + + // Step 4.e.iii. + if (len === 2) + break; + + // Step 4.e.iv. + if (e === -1) { + end = size; + break; + } + + // Step 4.e.v. + end = e; + k = e + 1; + } + + // Step 4.f. + return callFunction(String_substring, extension, start, end); + } + + // Step 5. + searchValue = "-" + key; + + // Steps 6-7. + if (callFunction(std_String_endsWith, extension, searchValue)) + return ""; + + // Step 8 (implicit). +} + + /** * Compares a BCP 47 language priority list against availableLocales and * determines the best available language to meet the request. Options specified @@ -978,19 +1048,8 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte // Step 4. var foundLocale = r.locale; - // Step 5.a. + // Step 5 (Not applicable in this implementation). var extension = r.extension; - var extensionIndex, extensionSubtags, extensionSubtagsLength; - - // Step 5. - if (extension !== undefined) { - // Step 5.b. - extensionIndex = r.extensionIndex; - - // Steps 5.d-e. - extensionSubtags = StringSplitString(ToString(extension), "-"); - extensionSubtagsLength = extensionSubtags.length; - } // Steps 6-7. var result = new Record(); @@ -999,68 +1058,57 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte // Step 8. var supportedExtension = "-u"; - // Steps 9-11. + // Steps 9-12. var i = 0; var len = relevantExtensionKeys.length; var foundLocaleData; if (len > 0) { // In this implementation, localeData is a function, not an object. - // Step 11.b. + // Step 12.b. foundLocaleData = localeData(foundLocale); } while (i < len) { - // Step 11.a. + // Step 12.a. var key = relevantExtensionKeys[i]; - // Step 11.c. + // Step 12.c. var keyLocaleData = foundLocaleData[key]; // Locale data provides default value. - // Step 11.d. + // Step 12.d. var value = keyLocaleData[0]; + assert(typeof value === "string" || value === null, "unexpected locale data value"); // Locale tag may override. - // Step 11.e. + // Step 12.e. var supportedExtensionAddition = ""; - // Step 11.f is implemented by Utilities.js. + // Step 12.f. + if (extension !== undefined) { + // NB: The step annotations don't yet match the ES2017 Intl draft, + // 94045d234762ad107a3d09bb6f7381a65f1a2f9b, because the PR to add + // the new UnicodeExtensionValue abstract operation still needs to + // be written. - var valuePos; + // Step 12.f.i. + var requestedValue = UnicodeExtensionValue(extension, key); - // Step 11.g. - if (extensionSubtags !== undefined) { - // Step 11.g.i. - var keyPos = callFunction(ArrayIndexOf, extensionSubtags, key); - - // Step 11.g.ii. - if (keyPos !== -1) { - // Step 11.g.ii.1. - if (keyPos + 1 < extensionSubtagsLength && - extensionSubtags[keyPos + 1].length > 2) - { - // Step 11.g.ii.1.a. - var requestedValue = extensionSubtags[keyPos + 1]; - - // Step 11.g.ii.1.b. - valuePos = callFunction(ArrayIndexOf, keyLocaleData, requestedValue); - - // Step 11.g.ii.1.c. - if (valuePos !== -1) { + // Step 12.f.ii. + if (requestedValue !== undefined) { + // Step 12.f.ii.1. + if (requestedValue !== "") { + // Step 12.f.ii.1.a. + if (callFunction(ArrayIndexOf, keyLocaleData, requestedValue) !== -1) { value = requestedValue; supportedExtensionAddition = "-" + key + "-" + value; } } else { - // Step 11.g.ii.2. + // Step 12.f.ii.2. // According to the LDML spec, if there's no type value, // and true is an allowed value, it's used. - - // Step 11.g.ii.2.a. - valuePos = callFunction(ArrayIndexOf, keyLocaleData, "true"); - - // Step 11.g.ii.2.b. - if (valuePos !== -1) + if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1) value = "true"; } } @@ -1068,35 +1116,53 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte // Options override all. - // Step 11.h.i. + // Step 12.g.i. var optionsValue = options[key]; - // Step 11.h, 11.h.ii. + // Step 12.g, 12.g.ii. if (optionsValue !== undefined && + optionsValue !== value && callFunction(ArrayIndexOf, keyLocaleData, optionsValue) !== -1) { - // Step 11.h.ii.1. - if (optionsValue !== value) { - value = optionsValue; - supportedExtensionAddition = ""; - } + value = optionsValue; + supportedExtensionAddition = ""; } - // Steps 11.i-k. + // Steps 12.h-j. result[key] = value; supportedExtension += supportedExtensionAddition; i++; } - // Step 12. + // Step 13. if (supportedExtension.length > 2) { - var preExtension = callFunction(String_substring, foundLocale, 0, extensionIndex); - var postExtension = callFunction(String_substring, foundLocale, extensionIndex); - foundLocale = preExtension + supportedExtension + postExtension; + assert(!callFunction(std_String_startsWith, foundLocale, "x-"), + "unexpected privateuse-only locale returned from ICU"); + + // Step 13.a. + var privateIndex = callFunction(std_String_indexOf, foundLocale, "-x-"); + + // Steps 13.b-c. + if (privateIndex === -1) { + foundLocale += supportedExtension; + } else { + var preExtension = callFunction(String_substring, foundLocale, 0, privateIndex); + var postExtension = callFunction(String_substring, foundLocale, privateIndex); + foundLocale = preExtension + supportedExtension + postExtension; + } + + // Step 13.d. + assert(IsStructurallyValidLanguageTag(foundLocale), "invalid locale after concatenation"); + + // Step 13.e (Not required in this implementation, because we don't + // canonicalize Unicode extension subtags). + assert(foundLocale === CanonicalizeLanguageTag(foundLocale), "same locale with extension"); } - // Steps 13-14. + // Step 14. result.locale = foundLocale; + + // Step 15. return result; } diff --git a/js/src/tests/Intl/DateTimeFormat/calendar-aliases.js b/js/src/tests/Intl/DateTimeFormat/calendar-aliases.js new file mode 100644 index 000000000000..901adcb45f4e --- /dev/null +++ b/js/src/tests/Intl/DateTimeFormat/calendar-aliases.js @@ -0,0 +1,35 @@ +// |reftest| skip-if(!this.hasOwnProperty("Intl")) + +// Ensure ethiopic-amete-alem is resolved to ethioaa instead of ethiopic. +function testEthiopicAmeteAlem() { + var locale = "am-ET-u-nu-latn"; + var opts = {timeZone: "Africa/Addis_Ababa"}; + var dtfEthiopicAmeteAlem = new Intl.DateTimeFormat(`${locale}-ca-ethiopic-amete-alem`, opts); + var dtfEthioaa = new Intl.DateTimeFormat(`${locale}-ca-ethioaa`, opts); + var dtfEthiopic = new Intl.DateTimeFormat(`${locale}-ca-ethiopic`, opts); + + var date = new Date(2016, 1 - 1, 1); + + assertEq(dtfEthiopicAmeteAlem.format(date), dtfEthioaa.format(date)); + assertEq(dtfEthiopicAmeteAlem.format(date) === dtfEthiopic.format(date), false); +} + +// Ensure islamicc is resolved to islamic-civil. +function testIslamicCivil() { + var locale = "ar-SA-u-nu-latn"; + var opts = {timeZone: "Asia/Riyadh"}; + var dtfIslamicCivil = new Intl.DateTimeFormat(`${locale}-ca-islamic-civil`, opts); + var dtfIslamicc = new Intl.DateTimeFormat(`${locale}-ca-islamicc`, opts); + var dtfIslamic = new Intl.DateTimeFormat(`${locale}-ca-islamic`, opts); + + var date = new Date(2016, 1 - 1, 1); + + assertEq(dtfIslamicCivil.format(date), dtfIslamicc.format(date)); + assertEq(dtfIslamicCivil.format(date) === dtfIslamic.format(date), false); +} + +testEthiopicAmeteAlem(); +testIslamicCivil(); + +if (typeof reportCompare === "function") + reportCompare(0, 0, "ok"); diff --git a/js/src/tests/Intl/DateTimeFormat/islamic.js b/js/src/tests/Intl/DateTimeFormat/islamic.js new file mode 100644 index 000000000000..3a88590ea5e5 --- /dev/null +++ b/js/src/tests/Intl/DateTimeFormat/islamic.js @@ -0,0 +1,89 @@ +// |reftest| skip-if(!this.hasOwnProperty("Intl")) + +function civilDate(options, date) { + var opts = Object.assign({timeZone: "Asia/Riyadh"}, options); + return new Intl.DateTimeFormat("ar-SA-u-ca-islamic-civil-nu-latn", opts).format(date); +} + +function tabularDate(options, date) { + var opts = Object.assign({timeZone: "Asia/Riyadh"}, options); + return new Intl.DateTimeFormat("ar-SA-u-ca-islamic-tbla-nu-latn", opts).format(date); +} + +function sightingDate(options, date) { + var opts = Object.assign({timeZone: "Asia/Riyadh"}, options); + return new Intl.DateTimeFormat("ar-SA-u-ca-islamic-rgsa-nu-latn", opts).format(date); +} + +function ummAlQuraDate(options, date) { + var opts = Object.assign({timeZone: "Asia/Riyadh"}, options); + return new Intl.DateTimeFormat("ar-SA-u-ca-umalqura-nu-latn", opts).format(date); +} + +// Test islamic-tbla (Tabular / Thursday epoch). +// Compare with islamic-civil (Tabular / Friday epoch). +function testIslamicTbla() { + var date = new Date(Date.UTC(2015, 1 - 1, 1)); + + // Month and year are the same. + var monthYear = {year: "numeric", month: "numeric"}; + assertEq(civilDate(monthYear, date), tabularDate(monthYear, date)); + + // Day is different by one. + var day = {day: "numeric"}; + assertEq(Number(civilDate(day, date)) - Number(tabularDate(day, date)), -1); +} + +// Test islamic-rgsa (Saudi Arabia sighting). +// Sighting of the hilal (crescent moon) in Saudi Arabia. +function testIslamicRgsa() { + var date1 = new Date(Date.UTC(1975, 5 - 1, 6)); + var date2 = new Date(Date.UTC(2015, 1 - 1, 1)); + var dayMonthYear = {year: "numeric", month: "numeric", day: "numeric"}; + + assertEq(sightingDate(dayMonthYear, date1), tabularDate(dayMonthYear, date1)); + assertEq(sightingDate(dayMonthYear, date2), civilDate(dayMonthYear, date2)); +} + +// Test islamic-umalqura (Umm al-Qura). +function testIslamicUmalqura() { + var year = {year: "numeric"}; + var month = {month: "numeric"}; + var day = {day: "numeric"}; + + // From ICU test files, which in turn was generated from: + // Official Umm-al-Qura calendar of SA: + // home, http://www.ummulqura.org.sa/default.aspx + // converter, http://www.ummulqura.org.sa/Index.aspx + var dates = [ + [ {year: 2016, month: 1, day: 11}, {year: 1437, month: 4, day: 1} ], + [ {year: 2016, month: 2, day: 10}, {year: 1437, month: 5, day: 1} ], + [ {year: 2016, month: 3, day: 10}, {year: 1437, month: 6, day: 1} ], + [ {year: 2016, month: 4, day: 8}, {year: 1437, month: 7, day: 1} ], + [ {year: 2016, month: 5, day: 8}, {year: 1437, month: 8, day: 1} ], + [ {year: 2016, month: 6, day: 6}, {year: 1437, month: 9, day: 1} ], + [ {year: 2016, month: 7, day: 6}, {year: 1437, month: 10, day: 1} ], + [ {year: 2016, month: 8, day: 4}, {year: 1437, month: 11, day: 1} ], + [ {year: 2016, month: 9, day: 2}, {year: 1437, month: 12, day: 1} ], + [ {year: 2016, month: 10, day: 2}, {year: 1438, month: 1, day: 1} ], + [ {year: 2016, month: 11, day: 1}, {year: 1438, month: 2, day: 1} ], + [ {year: 2016, month: 11, day: 30}, {year: 1438, month: 3, day: 1} ], + [ {year: 2016, month: 12, day: 30}, {year: 1438, month: 4, day: 1} ], + ]; + + for (var [gregorian, ummAlQura] of dates) { + var date = new Date(Date.UTC(gregorian.year, gregorian.month - 1, gregorian.day)); + + // Use parseInt() to remove the trailing era indicator. + assertEq(parseInt(ummAlQuraDate(year, date), 10), ummAlQura.year); + assertEq(Number(ummAlQuraDate(month, date)), ummAlQura.month); + assertEq(Number(ummAlQuraDate(day, date)), ummAlQura.day); + } +} + +testIslamicTbla(); +testIslamicRgsa(); +testIslamicUmalqura(); + +if (typeof reportCompare === "function") + reportCompare(0, 0, "ok");