From 39cadacc6729896f48447e722a0b4342bd8a183c Mon Sep 17 00:00:00 2001 From: Yoshi Cheng-Hao Huang Date: Thu, 9 Sep 2021 12:02:21 +0000 Subject: [PATCH] Bug 1719747 - Part 1: Unify ListFormat in SM. r=gregtatum,anba,platform-i18n-reviewers,tcampbell Move implementations to mozilla::intl::ListFormat Differential Revision: https://phabricator.services.mozilla.com/D122334 --- intl/components/gtest/TestListFormat.cpp | 140 +++++++++++ intl/components/gtest/moz.build | 1 + intl/components/moz.build | 2 + intl/components/src/ICU4CGlue.h | 2 + intl/components/src/ICUError.h | 1 + intl/components/src/ListFormat.cpp | 165 +++++++++++++ intl/components/src/ListFormat.h | 171 ++++++++++++++ js/src/builtin/intl/CommonFunctions.cpp | 3 + js/src/builtin/intl/ListFormat.cpp | 285 ++++++----------------- js/src/builtin/intl/ListFormat.h | 17 +- 10 files changed, 565 insertions(+), 222 deletions(-) create mode 100644 intl/components/gtest/TestListFormat.cpp create mode 100644 intl/components/src/ListFormat.cpp create mode 100644 intl/components/src/ListFormat.h diff --git a/intl/components/gtest/TestListFormat.cpp b/intl/components/gtest/TestListFormat.cpp new file mode 100644 index 000000000000..5b52ecfc8419 --- /dev/null +++ b/intl/components/gtest/TestListFormat.cpp @@ -0,0 +1,140 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "gtest/gtest.h" + +#include "mozilla/intl/ListFormat.h" +#include "mozilla/Span.h" +#include "TestBuffer.h" + +namespace mozilla::intl { + +// Test ListFormat.format with default options. +TEST(IntlListFormat, FormatDefault) +{ + ListFormat::Options options; + UniquePtr lf = ListFormat::TryCreate("en-US", options).unwrap(); + ListFormat::StringList list; + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Alice"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Bob"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Charlie"))); + TestBuffer buf16; + ASSERT_TRUE(lf->Format(list, buf16).isOk()); + ASSERT_EQ(buf16.get_string_view(), u"Alice, Bob, and Charlie"); + + UniquePtr lfDe = ListFormat::TryCreate("de", options).unwrap(); + ASSERT_TRUE(lfDe->Format(list, buf16).isOk()); + ASSERT_EQ(buf16.get_string_view(), u"Alice, Bob und Charlie"); +} + +// Test ListFormat.format with Type::Conjunction and other styles. +TEST(IntlListFormat, FormatConjunction) +{ + ListFormat::Options options{ListFormat::Type::Conjunction, + ListFormat::Style::Narrow}; + UniquePtr lf = ListFormat::TryCreate("en-US", options).unwrap(); + ListFormat::StringList list; + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Alice"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Bob"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Charlie"))); + TestBuffer buf16; + ASSERT_TRUE(lf->Format(list, buf16).isOk()); + ASSERT_EQ(buf16.get_string_view(), u"Alice, Bob, Charlie"); + + ListFormat::Options optionsSh{ListFormat::Type::Conjunction, + ListFormat::Style::Short}; + UniquePtr lfSh = + ListFormat::TryCreate("en-US", optionsSh).unwrap(); + ASSERT_TRUE(lfSh->Format(list, buf16).isOk()); + ASSERT_EQ(buf16.get_string_view(), u"Alice, Bob, & Charlie"); +} + +// Test ListFormat.format with Type::Disjunction. +TEST(IntlListFormat, FormatDisjunction) +{ + // When Type is Disjunction, the results will be the same regardless of the + // style for most locales, so simply test with Style::Long. + ListFormat::Options options{ListFormat::Type::Disjunction, + ListFormat::Style::Long}; + UniquePtr lf = ListFormat::TryCreate("en-US", options).unwrap(); + ListFormat::StringList list; + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Alice"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Bob"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Charlie"))); + TestBuffer buf16; + ASSERT_TRUE(lf->Format(list, buf16).isOk()); + ASSERT_EQ(buf16.get_string_view(), u"Alice, Bob, or Charlie"); +} + +// Test ListFormat.format with Type::Unit. +TEST(IntlListFormat, FormatUnit) +{ + ListFormat::Options options{ListFormat::Type::Unit, ListFormat::Style::Long}; + // For locale "en", Style::Long and Style::Short have the same result, so just + // test Style::Long here. + UniquePtr lf = ListFormat::TryCreate("en-US", options).unwrap(); + ListFormat::StringList list; + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Alice"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Bob"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Charlie"))); + TestBuffer buf16; + ASSERT_TRUE(lf->Format(list, buf16).isOk()); + ASSERT_EQ(buf16.get_string_view(), u"Alice, Bob, Charlie"); + + ListFormat::Options optionsNa{ListFormat::Type::Unit, + ListFormat::Style::Narrow}; + UniquePtr lfNa = + ListFormat::TryCreate("en-US", optionsNa).unwrap(); + ASSERT_TRUE(lfNa->Format(list, buf16).isOk()); + ASSERT_EQ(buf16.get_string_view(), u"Alice Bob Charlie"); +} + +// Pass a long list (list.length() > DEFAULT_LIST_LENGTH) and check the result +// is still correct. (result.length > INITIAL_CHAR_BUFFER_SIZE) +TEST(IntlListFormat, FormatBufferLength) +{ + ListFormat::Options options; + UniquePtr lf = ListFormat::TryCreate("en-US", options).unwrap(); + ListFormat::StringList list; + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Alice"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Bob"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Charlie"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"David"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Eve"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Frank"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Grace"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Heidi"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Ivan"))); + TestBuffer buf16; + ASSERT_TRUE(lf->Format(list, buf16).isOk()); + ASSERT_EQ(buf16.get_string_view(), + u"Alice, Bob, Charlie, David, Eve, Frank, Grace, Heidi, and Ivan"); +} + +TEST(IntlListFormat, FormatToParts) +{ + ListFormat::Options options; + UniquePtr lf = ListFormat::TryCreate("en-US", options).unwrap(); + ListFormat::StringList list; + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Alice"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Bob"))); + MOZ_RELEASE_ASSERT(list.append(MakeStringSpan(u"Charlie"))); + ListFormat::PartVector parts; + ASSERT_TRUE(lf->FormatToParts(list, parts).isOk()); + + // 3 elements, and 2 literals. + ASSERT_EQ(parts.length(), 5u); + + ASSERT_EQ(parts[0], (ListFormat::Part{ListFormat::PartType::Element, + MakeStringSpan(u"Alice")})); + ASSERT_EQ(parts[1], (ListFormat::Part{ListFormat::PartType::Literal, + MakeStringSpan(u", ")})); + ASSERT_EQ(parts[2], (ListFormat::Part{ListFormat::PartType::Element, + MakeStringSpan(u"Bob")})); + ASSERT_EQ(parts[3], (ListFormat::Part{ListFormat::PartType::Literal, + MakeStringSpan(u", and ")})); + ASSERT_EQ(parts[4], (ListFormat::Part{ListFormat::PartType::Element, + MakeStringSpan(u"Charlie")})); +} + +} // namespace mozilla::intl diff --git a/intl/components/gtest/moz.build b/intl/components/gtest/moz.build index 9a18d1a176d8..ba6154d4b194 100644 --- a/intl/components/gtest/moz.build +++ b/intl/components/gtest/moz.build @@ -8,6 +8,7 @@ UNIFIED_SOURCES += [ "TestCalendar.cpp", "TestCollator.cpp", "TestDateTimeFormat.cpp", + "TestListFormat.cpp", "TestLocaleCanonicalizer.cpp", "TestNumberFormat.cpp", "TestPluralRules.cpp", diff --git a/intl/components/moz.build b/intl/components/moz.build index dda0bf2d6a63..362f8f9580f1 100644 --- a/intl/components/moz.build +++ b/intl/components/moz.build @@ -10,6 +10,7 @@ EXPORTS.mozilla.intl = [ "src/DateTimePatternGenerator.h", "src/ICU4CGlue.h", "src/ICUError.h", + "src/ListFormat.h", "src/LocaleCanonicalizer.h", "src/NumberFormat.h", "src/NumberFormatFields.h", @@ -24,6 +25,7 @@ UNIFIED_SOURCES += [ "src/DateTimeFormat.cpp", "src/DateTimePatternGenerator.cpp", "src/ICU4CGlue.cpp", + "src/ListFormat.cpp", "src/LocaleCanonicalizer.cpp", "src/NumberFormat.cpp", "src/NumberFormatFields.cpp", diff --git a/intl/components/src/ICU4CGlue.h b/intl/components/src/ICU4CGlue.h index 2476c341f00c..fe7d88933fa7 100644 --- a/intl/components/src/ICU4CGlue.h +++ b/intl/components/src/ICU4CGlue.h @@ -64,6 +64,8 @@ class ICUPointer { const T* GetConst() const { return const_cast(mPointer); } T* GetMut() { return mPointer; } + explicit operator bool() const { return !!mPointer; } + private: T* mPointer; }; diff --git a/intl/components/src/ICUError.h b/intl/components/src/ICUError.h index cbb4ed6856eb..9d6ca9d85e87 100644 --- a/intl/components/src/ICUError.h +++ b/intl/components/src/ICUError.h @@ -15,6 +15,7 @@ namespace mozilla::intl { enum class ICUError : uint8_t { OutOfMemory, InternalError, + OverflowError, }; /** diff --git a/intl/components/src/ListFormat.cpp b/intl/components/src/ListFormat.cpp new file mode 100644 index 000000000000..390821bfdd16 --- /dev/null +++ b/intl/components/src/ListFormat.cpp @@ -0,0 +1,165 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "mozilla/intl/ListFormat.h" + +namespace mozilla::intl { + +/*static*/ Result, ICUError> ListFormat::TryCreate( + mozilla::Span aLocale, const Options& aOptions) { + UListFormatterType utype = ToUListFormatterType(aOptions.mType); + UListFormatterWidth uwidth = ToUListFormatterWidth(aOptions.mStyle); + + UErrorCode status = U_ZERO_ERROR; + UListFormatter* fmt = + ulistfmt_openForType(aLocale.data(), utype, uwidth, &status); + if (U_FAILURE(status)) { + return Err(ICUError::InternalError); + } + + UFormattedList* fl = ulistfmt_openResult(&status); + if (U_FAILURE(status)) { + return Err(ICUError::InternalError); + } + + return UniquePtr(new ListFormat(fmt, fl)); +} + +ListFormat::~ListFormat() { + if (mListFormatter) { + ulistfmt_close(mListFormatter.GetMut()); + } + + if (mFormattedList) { + ulistfmt_closeResult(mFormattedList.GetMut()); + } +} + +/* static */ UListFormatterType ListFormat::ToUListFormatterType(Type type) { + switch (type) { + case Type::Conjunction: + return ULISTFMT_TYPE_AND; + case Type::Disjunction: + return ULISTFMT_TYPE_OR; + case Type::Unit: + return ULISTFMT_TYPE_UNITS; + } + MOZ_ASSERT_UNREACHABLE(); + return ULISTFMT_TYPE_AND; +} + +/* static */ UListFormatterWidth ListFormat::ToUListFormatterWidth( + Style style) { + switch (style) { + case Style::Long: + return ULISTFMT_WIDTH_WIDE; + case Style::Short: + return ULISTFMT_WIDTH_SHORT; + case Style::Narrow: + return ULISTFMT_WIDTH_NARROW; + } + MOZ_ASSERT_UNREACHABLE(); + return ULISTFMT_WIDTH_WIDE; +} + +ICUResult ListFormat::FormatToParts(const StringList& list, PartVector& parts) { + UErrorCode status = U_ZERO_ERROR; + + mozilla::Vector u16strings; + mozilla::Vector u16stringLens; + MOZ_TRY(ConvertStringListToVectors(list, u16strings, u16stringLens)); + + ulistfmt_formatStringsToResult(mListFormatter.GetConst(), u16strings.begin(), + u16stringLens.begin(), int32_t(list.length()), + mFormattedList.GetMut(), &status); + if (U_FAILURE(status)) { + return Err(ICUError::InternalError); + } + + const UFormattedValue* formattedValue = + ulistfmt_resultAsValue(mFormattedList.GetConst(), &status); + if (U_FAILURE(status)) { + return Err(ICUError::InternalError); + } + + int32_t formattedCharsLen; + const char16_t* formattedChars = + ufmtval_getString(formattedValue, &formattedCharsLen, &status); + if (U_FAILURE(status)) { + return Err(ICUError::InternalError); + } + + size_t formattedSize = AssertedCast(formattedCharsLen); + mozilla::Span formattedSpan{formattedChars, formattedSize}; + size_t lastEndIndex = 0; + + auto AppendPart = [&](PartType type, size_t beginIndex, size_t endIndex) { + if (!parts.emplaceBack(type, formattedSpan.FromTo(beginIndex, endIndex))) { + return false; + } + + lastEndIndex = endIndex; + return true; + }; + + UConstrainedFieldPosition* fpos = ucfpos_open(&status); + if (U_FAILURE(status)) { + return Err(ICUError::InternalError); + } + ScopedICUObject toCloseFpos(fpos); + + // We're only interested in ULISTFMT_ELEMENT_FIELD fields. + ucfpos_constrainField(fpos, UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, + &status); + if (U_FAILURE(status)) { + return Err(ICUError::InternalError); + } + + while (true) { + bool hasMore = ufmtval_nextPosition(formattedValue, fpos, &status); + if (U_FAILURE(status)) { + return Err(ICUError::InternalError); + } + if (!hasMore) { + break; + } + + int32_t beginIndexInt, endIndexInt; + ucfpos_getIndexes(fpos, &beginIndexInt, &endIndexInt, &status); + if (U_FAILURE(status)) { + return Err(ICUError::InternalError); + } + + MOZ_ASSERT(beginIndexInt <= endIndexInt, + "field iterator returning invalid range"); + + size_t beginIndex = AssertedCast(beginIndexInt); + size_t endIndex = AssertedCast(endIndexInt); + + // Indices are guaranteed to be returned in order (from left to right). + MOZ_ASSERT(lastEndIndex <= beginIndex, + "field iteration didn't return fields in order start to " + "finish as expected"); + + if (lastEndIndex < beginIndex) { + if (!AppendPart(PartType::Literal, lastEndIndex, beginIndex)) { + return Err(ICUError::InternalError); + } + } + + if (!AppendPart(PartType::Element, beginIndex, endIndex)) { + return Err(ICUError::InternalError); + } + } + + // Append any final literal. + if (lastEndIndex < formattedSize) { + if (!AppendPart(PartType::Literal, lastEndIndex, formattedSize)) { + return Err(ICUError::InternalError); + } + } + + return Ok(); +} + +} // namespace mozilla::intl diff --git a/intl/components/src/ListFormat.h b/intl/components/src/ListFormat.h new file mode 100644 index 000000000000..4fe4341d12a5 --- /dev/null +++ b/intl/components/src/ListFormat.h @@ -0,0 +1,171 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef intl_components_ListFormat_h_ +#define intl_components_ListFormat_h_ + +#include "mozilla/CheckedInt.h" +#include "mozilla/intl/ICU4CGlue.h" +#include "mozilla/Result.h" +#include "mozilla/ResultVariant.h" +#include "mozilla/Vector.h" +#include "unicode/ulistformatter.h" + +struct UListFormatter; +struct UFormattedList; + +namespace mozilla::intl { + +static constexpr size_t DEFAULT_LIST_LENGTH = 8; + +/** + * This component is a Mozilla-focused API for the list formatting provided by + * ICU. It implements the API provided by the ECMA-402 Intl.ListFormat object. + * + * https://tc39.es/ecma402/#listformat-objects + */ +class ListFormat final { + public: + /** + * The [[Type]] and [[Style]] properties of ListFormat instances. + * + * https://tc39.es/ecma402/#sec-properties-of-intl-listformat-instances + */ + // [[Type]] + enum class Type { Conjunction, Disjunction, Unit }; + // [[Style]] + enum class Style { Long, Short, Narrow }; + + /** + * The 'options' object to create Intl.ListFormat instance. + * + * https://tc39.es/ecma402/#sec-Intl.ListFormat + */ + struct Options { + // "conjunction" is the default fallback value. + Type mType = Type::Conjunction; + + // "long" is the default fallback value. + Style mStyle = Style::Long; + }; + + /** + * Create a ListFormat object for the provided locale and options. + * + * https://tc39.es/ecma402/#sec-Intl.ListFormat + */ + static Result, ICUError> TryCreate( + mozilla::Span aLocale, const Options& aOptions); + + ~ListFormat(); + + /** + * The list of String values for FormatList and FormatListToParts. + * + * https://tc39.es/ecma402/#sec-formatlist + * https://tc39.es/ecma402/#sec-formatlisttoparts + */ + using StringList = + mozilla::Vector, DEFAULT_LIST_LENGTH>; + + /** + * Format the list according and write the result in buffer. + * + * https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.format + * https://tc39.es/ecma402/#sec-formatlist + */ + template + ICUResult Format(const StringList& list, Buffer& buffer) const { + static_assert(std::is_same_v, + "Currently only UTF-16 buffers are supported."); + + mozilla::Vector u16strings; + mozilla::Vector u16stringLens; + MOZ_TRY(ConvertStringListToVectors(list, u16strings, u16stringLens)); + + int32_t u16stringCount = mozilla::AssertedCast(list.length()); + MOZ_TRY(FillBufferWithICUCall( + buffer, [this, &u16strings, &u16stringLens, u16stringCount]( + char16_t* chars, int32_t size, UErrorCode* status) { + return ulistfmt_format(mListFormatter.GetConst(), u16strings.begin(), + u16stringLens.begin(), u16stringCount, chars, + size, status); + })); + + return Ok{}; + } + + /** + * The corresponding list of parts according to the effective locale and the + * formatting options of ListFormat. + * Each part has a [[Type]] field, which must be "element" or "literal". + * + * https://tc39.es/ecma402/#sec-createpartsfromlist + */ + enum class PartType { + Element, + Literal, + }; + using Part = std::pair>; + using PartVector = mozilla::Vector; + + /** + * Format the list to a list of parts, and write the result into parts. + * The PartVector contains mozilla::Span which point to memory which may be + * overridden when the next format method is called. + * + * https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.formatToParts + * https://tc39.es/ecma402/#sec-formatlisttoparts + */ + ICUResult FormatToParts(const StringList& list, PartVector& parts); + + private: + ListFormat() = delete; + ListFormat(UListFormatter* fmt, UFormattedList* fl) + : mListFormatter(fmt), mFormattedList(fl) {} + ListFormat(const ListFormat&) = delete; + ListFormat& operator=(const ListFormat&) = delete; + + ICUPointer mListFormatter = + ICUPointer(nullptr); + ICUPointer mFormattedList = + ICUPointer(nullptr); + + // Convert StringList to an array of type 'const char16_t*' and an array of + // int32 for ICU-API. + ICUResult ConvertStringListToVectors( + const StringList& list, + mozilla::Vector& u16strings, + mozilla::Vector& u16stringLens) const { + // Keep a conservative running count of overall length. + mozilla::CheckedInt stringLengthTotal(0); + for (const auto& string : list) { + if (!u16strings.append(string.data())) { + return Err(ICUError::InternalError); + } + + int32_t len = mozilla::AssertedCast(string.size()); + if (!u16stringLens.append(len)) { + return Err(ICUError::InternalError); + } + + stringLengthTotal += len; + } + + // Add space for N unrealistically large conjunctions. + constexpr int32_t MaxConjunctionLen = 100; + stringLengthTotal += CheckedInt(list.length()) * MaxConjunctionLen; + // If the overestimate exceeds ICU length limits, don't try to format. + if (!stringLengthTotal.isValid()) { + return Err(ICUError::OverflowError); + } + + return Ok{}; + } + + static UListFormatterType ToUListFormatterType(Type type); + static UListFormatterWidth ToUListFormatterWidth(Style style); +}; + +} // namespace mozilla::intl +#endif // intl_components_ListFormat_h_ diff --git a/js/src/builtin/intl/CommonFunctions.cpp b/js/src/builtin/intl/CommonFunctions.cpp index 58221357c47c..a094840d76b9 100644 --- a/js/src/builtin/intl/CommonFunctions.cpp +++ b/js/src/builtin/intl/CommonFunctions.cpp @@ -102,6 +102,9 @@ void js::intl::ReportInternalError(JSContext* cx, case mozilla::intl::ICUError::InternalError: ReportInternalError(cx); return; + case mozilla::intl::ICUError::OverflowError: + ReportAllocationOverflow(cx); + return; } MOZ_CRASH("Unexpected ICU error"); } diff --git a/js/src/builtin/intl/ListFormat.cpp b/js/src/builtin/intl/ListFormat.cpp index 694698059c70..e0f82136a6a5 100644 --- a/js/src/builtin/intl/ListFormat.cpp +++ b/js/src/builtin/intl/ListFormat.cpp @@ -7,21 +7,18 @@ #include "builtin/intl/ListFormat.h" #include "mozilla/Assertions.h" -#include "mozilla/CheckedInt.h" +#include "mozilla/intl/ListFormat.h" #include "mozilla/PodOperations.h" #include -#include #include "builtin/Array.h" #include "builtin/intl/CommonFunctions.h" +#include "builtin/intl/FormatBuffer.h" #include "builtin/intl/ScopedICUObject.h" #include "gc/FreeOp.h" #include "js/Utility.h" #include "js/Vector.h" -#include "unicode/uformattedvalue.h" -#include "unicode/ulistformatter.h" -#include "unicode/utypes.h" #include "vm/JSContext.h" #include "vm/PlainObject.h" // js::PlainObject #include "vm/Runtime.h" // js::ReportAllocationOverflow @@ -137,19 +134,20 @@ static bool ListFormat(JSContext* cx, unsigned argc, Value* vp) { void js::ListFormatObject::finalize(JSFreeOp* fop, JSObject* obj) { MOZ_ASSERT(fop->onMainThread()); - if (UListFormatter* lf = obj->as().getListFormatter()) { + mozilla::intl::ListFormat* lf = + obj->as().getListFormatSlot(); + if (lf) { intl::RemoveICUCellMemory(fop, obj, ListFormatObject::EstimatedMemoryUse); - - ulistfmt_close(lf); + delete lf; } } /** - * Returns a new UListFormatter with the locale and list formatting options + * Returns a new ListFormat with the locale and list formatting options * of the given ListFormat. */ -static UListFormatter* NewUListFormatter(JSContext* cx, - Handle listFormat) { +static mozilla::intl::ListFormat* NewListFormat( + JSContext* cx, Handle listFormat) { RootedObject internals(cx, intl::GetInternalsObject(cx, listFormat)); if (!internals) { return nullptr; @@ -165,9 +163,9 @@ static UListFormatter* NewUListFormatter(JSContext* cx, return nullptr; } - enum class ListFormatType { Conjunction, Disjunction, Unit }; + mozilla::intl::ListFormat::Options options; - ListFormatType type; + using ListFormatType = mozilla::intl::ListFormat::Type; if (!GetProperty(cx, internals, internals, cx->names().type, &value)) { return nullptr; } @@ -178,18 +176,16 @@ static UListFormatter* NewUListFormatter(JSContext* cx, } if (StringEqualsLiteral(strType, "conjunction")) { - type = ListFormatType::Conjunction; + options.mType = ListFormatType::Conjunction; } else if (StringEqualsLiteral(strType, "disjunction")) { - type = ListFormatType::Disjunction; + options.mType = ListFormatType::Disjunction; } else { MOZ_ASSERT(StringEqualsLiteral(strType, "unit")); - type = ListFormatType::Unit; + options.mType = ListFormatType::Unit; } } - enum class ListFormatStyle { Long, Short, Narrow }; - - ListFormatStyle style; + using ListFormatStyle = mozilla::intl::ListFormat::Style; if (!GetProperty(cx, internals, internals, cx->names().style, &value)) { return nullptr; } @@ -200,81 +196,43 @@ static UListFormatter* NewUListFormatter(JSContext* cx, } if (StringEqualsLiteral(strStyle, "long")) { - style = ListFormatStyle::Long; + options.mStyle = ListFormatStyle::Long; } else if (StringEqualsLiteral(strStyle, "short")) { - style = ListFormatStyle::Short; + options.mStyle = ListFormatStyle::Short; } else { MOZ_ASSERT(StringEqualsLiteral(strStyle, "narrow")); - style = ListFormatStyle::Narrow; + options.mStyle = ListFormatStyle::Narrow; } } - UListFormatterType utype; - switch (type) { - case ListFormatType::Conjunction: - utype = ULISTFMT_TYPE_AND; - break; - case ListFormatType::Disjunction: - utype = ULISTFMT_TYPE_OR; - break; - case ListFormatType::Unit: - utype = ULISTFMT_TYPE_UNITS; - break; + auto result = mozilla::intl::ListFormat::TryCreate( + mozilla::MakeStringSpan(IcuLocale(locale.get())), options); + + if (result.isOk()) { + return result.unwrap().release(); } - UListFormatterWidth uwidth; - switch (style) { - case ListFormatStyle::Long: - uwidth = ULISTFMT_WIDTH_WIDE; - break; - case ListFormatStyle::Short: - uwidth = ULISTFMT_WIDTH_SHORT; - break; - case ListFormatStyle::Narrow: - uwidth = ULISTFMT_WIDTH_NARROW; - break; - } - - UErrorCode status = U_ZERO_ERROR; - UListFormatter* lf = - ulistfmt_openForType(IcuLocale(locale.get()), utype, uwidth, &status); - if (U_FAILURE(status)) { - intl::ReportInternalError(cx); - return nullptr; - } - return lf; + js::intl::ReportInternalError(cx, result.unwrapErr()); + return nullptr; } -static constexpr size_t DEFAULT_LIST_LENGTH = 8; - -using ListFormatStringVector = Vector; -using ListFormatStringLengthVector = Vector; - -static_assert(sizeof(UniqueTwoByteChars) == sizeof(char16_t*), - "UniqueTwoByteChars are stored efficiently and are held in " - "continuous memory"); - /** * FormatList ( listFormat, list ) */ -static bool FormatList(JSContext* cx, UListFormatter* lf, - const ListFormatStringVector& strings, - const ListFormatStringLengthVector& stringLengths, +static bool FormatList(JSContext* cx, mozilla::intl::ListFormat* lf, + const mozilla::intl::ListFormat::StringList& list, MutableHandleValue result) { - MOZ_ASSERT(strings.length() == stringLengths.length()); - MOZ_ASSERT(strings.length() <= INT32_MAX); - - JSString* str = intl::CallICU(cx, [lf, &strings, &stringLengths]( - UChar* chars, int32_t size, - UErrorCode* status) { - return ulistfmt_format( - lf, reinterpret_cast(strings.begin()), - stringLengths.begin(), int32_t(strings.length()), chars, size, status); - }); - if (!str) { + intl::FormatBuffer formatBuffer(cx); + auto formatResult = lf->Format(list, formatBuffer); + if (formatResult.isErr()) { + js::intl::ReportInternalError(cx, formatResult.unwrapErr()); return false; } + JSString* str = formatBuffer.toString(); + if (!str) { + return false; + } result.setString(str); return true; } @@ -282,39 +240,13 @@ static bool FormatList(JSContext* cx, UListFormatter* lf, /** * FormatListToParts ( listFormat, list ) */ -static bool FormatListToParts(JSContext* cx, UListFormatter* lf, - const ListFormatStringVector& strings, - const ListFormatStringLengthVector& stringLengths, +static bool FormatListToParts(JSContext* cx, mozilla::intl::ListFormat* lf, + const mozilla::intl::ListFormat::StringList& list, MutableHandleValue result) { - MOZ_ASSERT(strings.length() == stringLengths.length()); - MOZ_ASSERT(strings.length() <= INT32_MAX); - - UErrorCode status = U_ZERO_ERROR; - UFormattedList* formatted = ulistfmt_openResult(&status); - if (U_FAILURE(status)) { - intl::ReportInternalError(cx); - return false; - } - ScopedICUObject toClose(formatted); - - ulistfmt_formatStringsToResult( - lf, reinterpret_cast(strings.begin()), - stringLengths.begin(), int32_t(strings.length()), formatted, &status); - if (U_FAILURE(status)) { - intl::ReportInternalError(cx); - return false; - } - - const UFormattedValue* formattedValue = - ulistfmt_resultAsValue(formatted, &status); - if (U_FAILURE(status)) { - intl::ReportInternalError(cx); - return false; - } - - RootedString overallResult(cx, - intl::FormattedValueToString(cx, formattedValue)); - if (!overallResult) { + mozilla::intl::ListFormat::PartVector parts; + auto formatResult = lf->FormatToParts(list, parts); + if (formatResult.isErr()) { + js::intl::ReportInternalError(cx, formatResult.unwrapErr()); return false; } @@ -323,30 +255,31 @@ static bool FormatListToParts(JSContext* cx, UListFormatter* lf, return false; } - using FieldType = js::ImmutablePropertyNamePtr JSAtomState::*; - - size_t lastEndIndex = 0; RootedObject singlePart(cx); RootedValue val(cx); - auto AppendPart = [&](FieldType type, size_t beginIndex, size_t endIndex) { + for (const mozilla::intl::ListFormat::Part& part : parts) { singlePart = NewPlainObject(cx); if (!singlePart) { return false; } - val = StringValue(cx->names().*type); + if (part.first == mozilla::intl::ListFormat::PartType::Element) { + val = StringValue(cx->names().element); + } else { + val = StringValue(cx->names().literal); + } + if (!DefineDataProperty(cx, singlePart, cx->names().type, val)) { return false; } - JSLinearString* partSubstr = NewDependentString( - cx, overallResult, beginIndex, endIndex - beginIndex); - if (!partSubstr) { + JSString* partStr = + NewStringCopyN(cx, part.second.data(), part.second.size()); + if (!partStr) { return false; } - - val = StringValue(partSubstr); + val = StringValue(partStr); if (!DefineDataProperty(cx, singlePart, cx->names().value, val)) { return false; } @@ -354,73 +287,6 @@ static bool FormatListToParts(JSContext* cx, UListFormatter* lf, if (!NewbornArrayPush(cx, partsArray, ObjectValue(*singlePart))) { return false; } - - lastEndIndex = endIndex; - return true; - }; - - UConstrainedFieldPosition* fpos = ucfpos_open(&status); - if (U_FAILURE(status)) { - intl::ReportInternalError(cx); - return false; - } - ScopedICUObject toCloseFpos(fpos); - - // We're only interested in ULISTFMT_ELEMENT_FIELD fields. - ucfpos_constrainField(fpos, UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, - &status); - if (U_FAILURE(status)) { - intl::ReportInternalError(cx); - return false; - } - - while (true) { - bool hasMore = ufmtval_nextPosition(formattedValue, fpos, &status); - if (U_FAILURE(status)) { - intl::ReportInternalError(cx); - return false; - } - if (!hasMore) { - break; - } - - int32_t beginIndexInt, endIndexInt; - ucfpos_getIndexes(fpos, &beginIndexInt, &endIndexInt, &status); - if (U_FAILURE(status)) { - intl::ReportInternalError(cx); - return false; - } - - MOZ_ASSERT(beginIndexInt >= 0); - MOZ_ASSERT(endIndexInt >= 0); - MOZ_ASSERT(beginIndexInt <= endIndexInt, - "field iterator returning invalid range"); - - size_t beginIndex = size_t(beginIndexInt); - size_t endIndex = size_t(endIndexInt); - - // Indices are guaranteed to be returned in order (from left to right). - MOZ_ASSERT(lastEndIndex <= beginIndex, - "field iteration didn't return fields in order start to " - "finish as expected"); - - if (lastEndIndex < beginIndex) { - if (!AppendPart(&JSAtomState::literal, lastEndIndex, beginIndex)) { - return false; - } - } - - if (!AppendPart(&JSAtomState::element, beginIndex, endIndex)) { - return false; - } - } - - // Append any final literal. - if (lastEndIndex < overallResult->length()) { - if (!AppendPart(&JSAtomState::literal, lastEndIndex, - overallResult->length())) { - return false; - } } result.setObject(*partsArray); @@ -436,30 +302,30 @@ bool js::intl_FormatList(JSContext* cx, unsigned argc, Value* vp) { bool formatToParts = args[2].toBoolean(); - // Obtain a cached UListFormatter object. - UListFormatter* lf = listFormat->getListFormatter(); + // Obtain a cached mozilla::intl::ListFormat object. + mozilla::intl::ListFormat* lf = listFormat->getListFormatSlot(); if (!lf) { - lf = NewUListFormatter(cx, listFormat); + lf = NewListFormat(cx, listFormat); if (!lf) { return false; } - listFormat->setListFormatter(lf); + listFormat->setListFormatSlot(lf); intl::AddICUCellMemory(listFormat, ListFormatObject::EstimatedMemoryUse); } // Collect all strings and their lengths. - ListFormatStringVector strings(cx); - ListFormatStringLengthVector stringLengths(cx); + // + // 'strings' takes the ownership of those strings, and 'list' will be passed + // to mozilla::intl::ListFormat as a Span. + Vector strings(cx); + mozilla::intl::ListFormat::StringList list; - // Keep a conservative running count of overall length. - CheckedInt stringLengthTotal(0); - - RootedArrayObject list(cx, &args[1].toObject().as()); + RootedArrayObject listObj(cx, &args[1].toObject().as()); RootedValue value(cx); - uint32_t listLen = list->length(); + uint32_t listLen = listObj->length(); for (uint32_t i = 0; i < listLen; i++) { - if (!GetElement(cx, list, list, i, &value)) { + if (!GetElement(cx, listObj, listObj, i, &value)) { return false; } @@ -469,10 +335,6 @@ bool js::intl_FormatList(JSContext* cx, unsigned argc, Value* vp) { } size_t linearLength = linear->length(); - if (!stringLengths.append(linearLength)) { - return false; - } - stringLengthTotal += linearLength; UniqueTwoByteChars chars = cx->make_pod_array(linearLength); if (!chars) { @@ -483,21 +345,14 @@ bool js::intl_FormatList(JSContext* cx, unsigned argc, Value* vp) { if (!strings.append(std::move(chars))) { return false; } + + if (!list.emplaceBack(strings[i].get(), linearLength)) { + return false; + } } - // Add space for N unrealistically large conjunctions. - constexpr int32_t MaxConjunctionLen = 100; - stringLengthTotal += CheckedInt(listLen) * MaxConjunctionLen; - - // If the overestimate exceeds ICU length limits, don't try to format. - if (!stringLengthTotal.isValid()) { - ReportAllocationOverflow(cx); - return false; - } - - // Use the UListFormatter to actually format the strings. if (formatToParts) { - return FormatListToParts(cx, lf, strings, stringLengths, args.rval()); + return FormatListToParts(cx, lf, list, args.rval()); } - return FormatList(cx, lf, strings, stringLengths, args.rval()); + return FormatList(cx, lf, list, args.rval()); } diff --git a/js/src/builtin/intl/ListFormat.h b/js/src/builtin/intl/ListFormat.h index 155606473402..967baf5b8620 100644 --- a/js/src/builtin/intl/ListFormat.h +++ b/js/src/builtin/intl/ListFormat.h @@ -15,7 +15,10 @@ #include "vm/NativeObject.h" class JSFreeOp; -struct UListFormatter; + +namespace mozilla::intl { +class ListFormat; +} // namespace mozilla::intl namespace js { @@ -25,7 +28,7 @@ class ListFormatObject : public NativeObject { static const JSClass& protoClass_; static constexpr uint32_t INTERNALS_SLOT = 0; - static constexpr uint32_t ULIST_FORMATTER_SLOT = 1; + static constexpr uint32_t LIST_FORMAT_SLOT = 1; static constexpr uint32_t SLOT_COUNT = 2; static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, @@ -35,16 +38,16 @@ class ListFormatObject : public NativeObject { // Estimated memory use for UListFormatter (see IcuMemoryUsage). static constexpr size_t EstimatedMemoryUse = 24; - UListFormatter* getListFormatter() const { - const auto& slot = getFixedSlot(ULIST_FORMATTER_SLOT); + mozilla::intl::ListFormat* getListFormatSlot() const { + const auto& slot = getFixedSlot(LIST_FORMAT_SLOT); if (slot.isUndefined()) { return nullptr; } - return static_cast(slot.toPrivate()); + return static_cast(slot.toPrivate()); } - void setListFormatter(UListFormatter* formatter) { - setFixedSlot(ULIST_FORMATTER_SLOT, PrivateValue(formatter)); + void setListFormatSlot(mozilla::intl::ListFormat* format) { + setFixedSlot(LIST_FORMAT_SLOT, PrivateValue(format)); } private: