Bug 1719550 - Build an initial unified mozilla::intl::Collator; r=platform-i18n-reviewers,nordzilla

This collator attempts to match the options bag from the Intl.Collator API from ECMA-402. It is built to be compatible and consistent across both Gecko code and SpiderMonkey code. Its behavior is designed to match ECMA-402. Differential Revision: https://phabricator.services.mozilla.com/D120494
2021-08-10 16:01:15 +00:00 · 2021-08-10 16:01:15 +00:00 · c2cf97af11
--- a/intl/components/gtest/TestCollator.cpp
+++ b/intl/components/gtest/TestCollator.cpp
@ -0,0 +1,196 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "gtest/gtest.h"
+
+#include <string.h>
+#include "mozilla/intl/Collator.h"
+#include "mozilla/Span.h"
+#include "TestBuffer.h"
+
+namespace mozilla::intl {
+
+TEST(IntlCollator, SetAttributesInternal)
+{
+  // Run through each settings to make sure MOZ_ASSERT is not triggered for
+  // misconfigured attributes.
+  auto result = Collator::TryCreate("en-US");
+  ASSERT_TRUE(result.isOk());
+  auto collator = result.unwrap();
+
+  collator->SetStrength(Collator::Strength::Primary);
+  collator->SetStrength(Collator::Strength::Secondary);
+  collator->SetStrength(Collator::Strength::Tertiary);
+  collator->SetStrength(Collator::Strength::Quaternary);
+  collator->SetStrength(Collator::Strength::Identical);
+  collator->SetStrength(Collator::Strength::Default);
+
+  collator->SetAlternateHandling(Collator::AlternateHandling::NonIgnorable)
+      .unwrap();
+  collator->SetAlternateHandling(Collator::AlternateHandling::Shifted).unwrap();
+  collator->SetAlternateHandling(Collator::AlternateHandling::Default).unwrap();
+
+  collator->SetCaseFirst(Collator::CaseFirst::False).unwrap();
+  collator->SetCaseFirst(Collator::CaseFirst::Upper).unwrap();
+  collator->SetCaseFirst(Collator::CaseFirst::Lower).unwrap();
+
+  collator->SetCaseLevel(Collator::Feature::On).unwrap();
+  collator->SetCaseLevel(Collator::Feature::Off).unwrap();
+  collator->SetCaseLevel(Collator::Feature::Default).unwrap();
+
+  collator->SetNumericCollation(Collator::Feature::On).unwrap();
+  collator->SetNumericCollation(Collator::Feature::Off).unwrap();
+  collator->SetNumericCollation(Collator::Feature::Default).unwrap();
+
+  collator->SetNormalizationMode(Collator::Feature::On).unwrap();
+  collator->SetNormalizationMode(Collator::Feature::Off).unwrap();
+  collator->SetNormalizationMode(Collator::Feature::Default).unwrap();
+}
+
+TEST(IntlCollator, GetSortKey)
+{
+  // Do some light sort key comparisons to ensure everything is wired up
+  // correctly. This is not doing extensive correctness testing.
+  auto result = Collator::TryCreate("en-US");
+  ASSERT_TRUE(result.isOk());
+  auto collator = result.unwrap();
+  TestBuffer<uint8_t> bufferA;
+  TestBuffer<uint8_t> bufferB;
+
+  auto compareSortKeys = [&](const char16_t* a, const char16_t* b) {
+    collator->GetSortKey(MakeStringSpan(a), bufferA).unwrap();
+    collator->GetSortKey(MakeStringSpan(b), bufferB).unwrap();
+    return strcmp(reinterpret_cast<const char*>(bufferA.data()),
+                  reinterpret_cast<const char*>(bufferB.data()));
+  };
+
+  ASSERT_TRUE(compareSortKeys(u"aaa", u"bbb") < 0);
+  ASSERT_TRUE(compareSortKeys(u"bbb", u"aaa") > 0);
+  ASSERT_TRUE(compareSortKeys(u"aaa", u"aaa") == 0);
+  ASSERT_TRUE(compareSortKeys(u"👍", u"👎") < 0);
+}
+
+TEST(IntlCollator, CompareStrings)
+{
+  // Do some light string comparisons to ensure everything is wired up
+  // correctly. This is not doing extensive correctness testing.
+  auto result = Collator::TryCreate("en-US");
+  ASSERT_TRUE(result.isOk());
+  auto collator = result.unwrap();
+  TestBuffer<uint8_t> bufferA;
+  TestBuffer<uint8_t> bufferB;
+
+  ASSERT_EQ(collator->CompareStrings(u"aaa", u"bbb"), -1);
+  ASSERT_EQ(collator->CompareStrings(u"bbb", u"aaa"), 1);
+  ASSERT_EQ(collator->CompareStrings(u"aaa", u"aaa"), 0);
+  ASSERT_EQ(collator->CompareStrings(u"👍", u"👎"), -1);
+}
+
+TEST(IntlCollator, SetOptionsSensitivity)
+{
+  // Test the ECMA 402 sensitivity behavior per:
+  // https://tc39.es/ecma402/#sec-collator-comparestrings
+  auto result = Collator::TryCreate("en-US");
+  ASSERT_TRUE(result.isOk());
+  auto collator = result.unwrap();
+
+  TestBuffer<uint8_t> bufferA;
+  TestBuffer<uint8_t> bufferB;
+  ICUResult optResult = Ok();
+  Collator::Options options{};
+
+  options.sensitivity = Collator::Sensitivity::Base;
+  optResult = collator->SetOptions(options);
+  ASSERT_TRUE(optResult.isOk());
+  ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
+  ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
+  ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
+
+  options.sensitivity = Collator::Sensitivity::Accent;
+  optResult = collator->SetOptions(options);
+  ASSERT_TRUE(optResult.isOk());
+  ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
+  ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
+  ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
+
+  options.sensitivity = Collator::Sensitivity::Case;
+  optResult = collator->SetOptions(options);
+  ASSERT_TRUE(optResult.isOk());
+  ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
+  ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
+  ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
+
+  options.sensitivity = Collator::Sensitivity::Variant;
+  optResult = collator->SetOptions(options);
+  ASSERT_TRUE(optResult.isOk());
+  ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
+  ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
+  ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
+}
+
+TEST(IntlCollator, LocaleSensitiveCollations)
+{
+  UniquePtr<Collator> collator = nullptr;
+  TestBuffer<uint8_t> bufferA;
+  TestBuffer<uint8_t> bufferB;
+
+  auto changeLocale = [&](const char* locale) {
+    auto result = Collator::TryCreate(locale);
+    ASSERT_TRUE(result.isOk());
+    collator = result.unwrap();
+
+    Collator::Options options{};
+    options.sensitivity = Collator::Sensitivity::Base;
+    auto optResult = collator->SetOptions(options);
+    ASSERT_TRUE(optResult.isOk());
+  };
+
+  // Swedish treats "Ö" as a separate character, which sorts after "Z".
+  changeLocale("en-US");
+  ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), -1);
+  changeLocale("sv-SE");
+  ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), 1);
+
+  // Country names in their respective scripts.
+  auto china = MakeStringSpan(u"中国");
+  auto japan = MakeStringSpan(u"日本");
+  auto korea = MakeStringSpan(u"한국");
+
+  changeLocale("en-US");
+  ASSERT_EQ(collator->CompareStrings(china, japan), -1);
+  ASSERT_EQ(collator->CompareStrings(china, korea), 1);
+  changeLocale("zh");
+  ASSERT_EQ(collator->CompareStrings(china, japan), 1);
+  ASSERT_EQ(collator->CompareStrings(china, korea), -1);
+  changeLocale("ja");
+  ASSERT_EQ(collator->CompareStrings(china, japan), -1);
+  ASSERT_EQ(collator->CompareStrings(china, korea), -1);
+  changeLocale("ko");
+  ASSERT_EQ(collator->CompareStrings(china, japan), 1);
+  ASSERT_EQ(collator->CompareStrings(china, korea), -1);
+}
+
+TEST(IntlCollator, IgnorePunctuation)
+{
+  TestBuffer<uint8_t> bufferA;
+  TestBuffer<uint8_t> bufferB;
+
+  auto result = Collator::TryCreate("en-US");
+  ASSERT_TRUE(result.isOk());
+  auto collator = result.unwrap();
+  Collator::Options options{};
+  options.ignorePunctuation = true;
+
+  auto optResult = collator->SetOptions(options);
+  ASSERT_TRUE(optResult.isOk());
+
+  ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), -1);
+
+  options.ignorePunctuation = false;
+  optResult = collator->SetOptions(options);
+  ASSERT_TRUE(optResult.isOk());
+
+  ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), 1);
+}
+
+}  // namespace mozilla::intl
--- a/intl/components/gtest/moz.build
+++ b/intl/components/gtest/moz.build
@ -6,6 +6,7 @@

 UNIFIED_SOURCES += [
    "TestCalendar.cpp",
+    "TestCollator.cpp",
    "TestDateTimeFormat.cpp",
    "TestNumberFormat.cpp",
    "TestPluralRules.cpp",
--- a/intl/components/moz.build
+++ b/intl/components/moz.build
@ -5,6 +5,7 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 EXPORTS.mozilla.intl = [
    "src/Calendar.h",
+    "src/Collator.h",
    "src/DateTimeFormat.h",
    "src/DateTimePatternGenerator.h",
    "src/ICU4CGlue.h",
@ -14,8 +15,10 @@ EXPORTS.mozilla.intl = [

 UNIFIED_SOURCES += [
    "src/Calendar.cpp",
+    "src/Collator.cpp",
    "src/DateTimeFormat.cpp",
    "src/DateTimePatternGenerator.cpp",
+    "src/ICU4CGlue.cpp",
    "src/NumberFormat.cpp",
    "src/NumberFormatFields.cpp",
    "src/NumberFormatterSkeleton.cpp",
--- a/intl/components/src/Collator.cpp
+++ b/intl/components/src/Collator.cpp
@ -0,0 +1,253 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+#include <string.h>
+#include "mozilla/intl/Collator.h"
+
+namespace mozilla::intl {
+
+Collator::Collator(UCollator* aCollator) : mCollator(aCollator) {
+  MOZ_ASSERT(aCollator);
+}
+
+Collator::~Collator() {
+  if (mCollator.GetMut()) {
+    ucol_close(mCollator.GetMut());
+  }
+}
+
+Result<UniquePtr<Collator>, ICUError> Collator::TryCreate(const char* aLocale) {
+  UErrorCode status = U_ZERO_ERROR;
+  UCollator* collator = ucol_open(aLocale, &status);
+  if (U_SUCCESS(status)) {
+    return MakeUnique<Collator>(collator);
+  }
+  if (status == U_MEMORY_ALLOCATION_ERROR) {
+    return Err(ICUError::OutOfMemory);
+  }
+  return Err(ICUError::InternalError);
+};
+
+int32_t Collator::CompareStrings(Span<const char16_t> aSource,
+                                 Span<const char16_t> aTarget) const {
+  switch (ucol_strcoll(mCollator.GetConst(), aSource.data(),
+                       static_cast<int32_t>(aSource.size()), aTarget.data(),
+                       static_cast<int32_t>(aTarget.size()))) {
+    case UCOL_LESS:
+      return -1;
+    case UCOL_EQUAL:
+      return 0;
+    case UCOL_GREATER:
+      return 1;
+  }
+  MOZ_ASSERT_UNREACHABLE("ucol_strcoll returned bad UCollationResult");
+  return 0;
+}
+
+int32_t Collator::CompareSortKeys(Span<const uint8_t> aKey1,
+                                  Span<const uint8_t> aKey2) const {
+  size_t minLength = std::min(aKey1.Length(), aKey2.Length());
+  int32_t tmpResult = strncmp((const char*)aKey1.Elements(),
+                              (const char*)aKey2.Elements(), minLength);
+  if (tmpResult < 0) {
+    return -1;
+  }
+  if (tmpResult > 0) {
+    return 1;
+  }
+  if (aKey1.Length() > minLength) {
+    // First string contains second one, so comes later, hence return > 0.
+    return 1;
+  }
+  if (aKey2.Length() > minLength) {
+    // First string is a substring of second one, so comes earlier,
+    // hence return < 0.
+    return -1;
+  }
+  return 0;
+}
+
+static UColAttributeValue CaseFirstToICU(Collator::CaseFirst caseFirst) {
+  switch (caseFirst) {
+    case Collator::CaseFirst::False:
+      return UCOL_OFF;
+    case Collator::CaseFirst::Upper:
+      return UCOL_UPPER_FIRST;
+    case Collator::CaseFirst::Lower:
+      return UCOL_LOWER_FIRST;
+  }
+
+  MOZ_ASSERT_UNREACHABLE();
+  return UCOL_DEFAULT;
+}
+
+// Define this as a macro to work around exposing the UColAttributeValue type to
+// the header file. Collation::Feature is private to the class.
+#define FEATURE_TO_ICU(featureICU, feature) \
+  switch (feature) {                        \
+    case Collator::Feature::On:             \
+      (featureICU) = UCOL_ON;               \
+      break;                                \
+    case Collator::Feature::Off:            \
+      (featureICU) = UCOL_OFF;              \
+      break;                                \
+    case Collator::Feature::Default:        \
+      (featureICU) = UCOL_DEFAULT;          \
+      break;                                \
+  }
+
+void Collator::SetStrength(Collator::Strength aStrength) {
+  UColAttributeValue strength;
+  switch (aStrength) {
+    case Collator::Strength::Default:
+      strength = UCOL_DEFAULT_STRENGTH;
+      break;
+    case Collator::Strength::Primary:
+      strength = UCOL_PRIMARY;
+      break;
+    case Collator::Strength::Secondary:
+      strength = UCOL_SECONDARY;
+      break;
+    case Collator::Strength::Tertiary:
+      strength = UCOL_TERTIARY;
+      break;
+    case Collator::Strength::Quaternary:
+      strength = UCOL_QUATERNARY;
+      break;
+    case Collator::Strength::Identical:
+      strength = UCOL_IDENTICAL;
+      break;
+  }
+
+  ucol_setStrength(mCollator.GetMut(), strength);
+}
+
+ICUResult Collator::SetCaseLevel(Collator::Feature aFeature) {
+  UErrorCode status = U_ZERO_ERROR;
+  UColAttributeValue featureICU;
+  FEATURE_TO_ICU(featureICU, aFeature);
+  ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_LEVEL, featureICU, &status);
+  return ToICUResult(status);
+}
+
+ICUResult Collator::SetAlternateHandling(
+    Collator::AlternateHandling aAlternateHandling) {
+  UErrorCode status = U_ZERO_ERROR;
+  UColAttributeValue handling;
+  switch (aAlternateHandling) {
+    case Collator::AlternateHandling::NonIgnorable:
+      handling = UCOL_NON_IGNORABLE;
+      break;
+    case Collator::AlternateHandling::Shifted:
+      handling = UCOL_SHIFTED;
+      break;
+    case Collator::AlternateHandling::Default:
+      handling = UCOL_DEFAULT;
+      break;
+  }
+
+  ucol_setAttribute(mCollator.GetMut(), UCOL_ALTERNATE_HANDLING, handling,
+                    &status);
+  return ToICUResult(status);
+}
+
+ICUResult Collator::SetNumericCollation(Collator::Feature aFeature) {
+  UErrorCode status = U_ZERO_ERROR;
+  UColAttributeValue featureICU;
+  FEATURE_TO_ICU(featureICU, aFeature);
+
+  ucol_setAttribute(mCollator.GetMut(), UCOL_NUMERIC_COLLATION, featureICU,
+                    &status);
+  return ToICUResult(status);
+}
+
+ICUResult Collator::SetNormalizationMode(Collator::Feature aFeature) {
+  UErrorCode status = U_ZERO_ERROR;
+  UColAttributeValue featureICU;
+  FEATURE_TO_ICU(featureICU, aFeature);
+  ucol_setAttribute(mCollator.GetMut(), UCOL_NORMALIZATION_MODE, featureICU,
+                    &status);
+  return ToICUResult(status);
+}
+
+ICUResult Collator::SetCaseFirst(Collator::CaseFirst aCaseFirst) {
+  UErrorCode status = U_ZERO_ERROR;
+  ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_FIRST,
+                    CaseFirstToICU(aCaseFirst), &status);
+  return ToICUResult(status);
+}
+
+ICUResult Collator::SetOptions(const Options& aOptions,
+                               const Maybe<Options&> aPrevOptions) {
+  if (aPrevOptions &&
+      // Check the equality of the previous options.
+      aPrevOptions->sensitivity == aOptions.sensitivity &&
+      aPrevOptions->caseFirst == aOptions.caseFirst &&
+      aPrevOptions->ignorePunctuation == aOptions.ignorePunctuation &&
+      aPrevOptions->numeric == aOptions.numeric) {
+    return Ok();
+  }
+
+  Collator::Strength strength = Collator::Strength::Default;
+  Collator::Feature caseLevel = Collator::Feature::Off;
+  switch (aOptions.sensitivity) {
+    case Collator::Sensitivity::Base:
+      strength = Collator::Strength::Primary;
+      break;
+    case Collator::Sensitivity::Accent:
+      strength = Collator::Strength::Secondary;
+      break;
+    case Collator::Sensitivity::Case:
+      caseLevel = Collator::Feature::On;
+      strength = Collator::Strength::Primary;
+      break;
+    case Collator::Sensitivity::Variant:
+      strength = Collator::Strength::Tertiary;
+      break;
+  }
+
+  SetStrength(strength);
+
+  ICUResult result = Ok();
+
+  // According to the ICU team, UCOL_SHIFTED causes punctuation to be
+  // ignored. Looking at Unicode Technical Report 35, Unicode Locale Data
+  // Markup Language, "shifted" causes whitespace and punctuation to be
+  // ignored - that's a bit more than asked for, but there's no way to get
+  // less.
+  result = this->SetAlternateHandling(
+      aOptions.ignorePunctuation ? Collator::AlternateHandling::Shifted
+                                 : Collator::AlternateHandling::Default);
+  if (result.isErr()) {
+    return result;
+  }
+
+  result = SetCaseLevel(caseLevel);
+  if (result.isErr()) {
+    return result;
+  }
+
+  result = SetNumericCollation(aOptions.numeric ? Collator::Feature::On
+                                                : Collator::Feature::Off);
+  if (result.isErr()) {
+    return result;
+  }
+
+  // Normalization is always on to meet the canonical equivalence requirement.
+  result = SetNormalizationMode(Collator::Feature::On);
+  if (result.isErr()) {
+    return result;
+  }
+
+  result = SetCaseFirst(aOptions.caseFirst);
+  if (result.isErr()) {
+    return result;
+  }
+  return Ok();
+}
+
+#undef FEATURE_TO_ICU
+
+}  // namespace mozilla::intl
--- a/intl/components/src/Collator.h
+++ b/intl/components/src/Collator.h
@ -0,0 +1,255 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef intl_components_Collator_h_
+#define intl_components_Collator_h_
+
+#ifndef JS_STANDALONE
+#  include "gtest/MozGtestFriend.h"
+#endif
+
+#include "unicode/ucol.h"
+
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+
+namespace mozilla::intl {
+
+class Collator final {
+ public:
+  /**
+   * Construct from a raw UCollator. This is public so that the UniquePtr can
+   * access it.
+   */
+  explicit Collator(UCollator* aCollator);
+
+  // Do not allow copy as this class owns the ICU resource. Move is not
+  // currently implemented, but a custom move operator could be created if
+  // needed.
+  Collator(const Collator&) = delete;
+  Collator& operator=(const Collator&) = delete;
+
+  /**
+   * Attempt to initialize a new collator.
+   */
+  static Result<UniquePtr<Collator>, ICUError> TryCreate(const char* aLocale);
+
+  ~Collator();
+
+  template <typename B>
+  ICUResult GetSortKey(Span<const char16_t> aString, B& aBuffer) const {
+    static_assert(std::is_same_v<typename B::CharType, uint8_t>,
+                  "Expected a uint8_t* buffer.");
+    // Do not use FillBufferWithICUCall, as this API does not report the
+    // U_BUFFER_OVERFLOW_ERROR. The return value is always the number of bytes
+    // needed, regardless of whether the result buffer was big enough.
+    UErrorCode status = U_ZERO_ERROR;
+    int32_t length =
+        ucol_getSortKey(mCollator.GetConst(), aString.data(),
+                        static_cast<int32_t>(aString.size()), nullptr, 0);
+    if (U_FAILURE(status) || length == 0) {
+      // If the length is 0, and internal error occurred according to the docs.
+      return Err(ICUError::InternalError);
+    }
+
+    if (!aBuffer.reserve(length)) {
+      return Err(ICUError::OutOfMemory);
+    }
+
+    length = ucol_getSortKey(mCollator.GetConst(), aString.data(),
+                             aString.size(), aBuffer.data(), length);
+
+    if (U_FAILURE(status) || length == 0) {
+      return Err(ICUError::InternalError);
+    }
+
+    aBuffer.written(length);
+    return Ok();
+  }
+
+  int32_t CompareStrings(Span<const char16_t> aSource,
+                         Span<const char16_t> aTarget) const;
+
+  int32_t CompareSortKeys(Span<const uint8_t> aKey1,
+                          Span<const uint8_t> aKey2) const;
+
+  /**
+   * Determine how casing affects sorting. These options map to ECMA 402
+   * collator options.
+   *
+   * https://tc39.es/ecma402/#sec-initializecollator
+   */
+  enum class CaseFirst {
+    // Sort upper case first.
+    Upper,
+    // Sort lower case first.
+    Lower,
+    // Orders upper and lower case letters in accordance to their tertiary
+    // weights.
+    False,
+  };
+
+  /**
+   * Which differences in the strings should lead to differences in collation
+   * comparisons.
+   *
+   * This setting needs to be ECMA 402 compliant.
+   * https://tc39.es/ecma402/#sec-collator-comparestrings
+   */
+  enum class Sensitivity {
+    // Only strings that differ in base letters compare as unequal.
+    // Examples: a ≠ b, a = á, a = A.
+    Base,
+    // Only strings that differ in base letters or accents and other diacritic
+    // marks compare as unequal.
+    // Examples: a ≠ b, a ≠ á, a = A.
+    Accent,
+    // Only strings that differ in base letters or case compare as unequal.
+    // Examples: a ≠ b, a = á, a ≠ A.
+    Case,
+    // Strings that differ in base letters, accents and other diacritic marks,
+    // or case compare as unequal. Other differences may also be taken into
+    // consideration.
+    // Examples: a ≠ b, a ≠ á, a ≠ A.
+    Variant,
+  };
+
+  /**
+   * These options map to ECMA 402 collator options. Make sure the defaults map
+   * to the default initialized values of ECMA 402.
+   *
+   * https://tc39.es/ecma402/#sec-initializecollator
+   */
+  struct Options {
+    Sensitivity sensitivity = Sensitivity::Variant;
+    CaseFirst caseFirst = CaseFirst::False;
+    bool ignorePunctuation = false;
+    bool numeric = false;
+  };
+
+  /**
+   * Change the configuraton of the options.
+   */
+  ICUResult SetOptions(const Options& aOptions,
+                       const Maybe<Options&> aPrevOptions = Nothing());
+
+ private:
+  /**
+   * Toggle features, or use the default setting.
+   */
+  enum class Feature {
+    // Turn the feature off.
+    On,
+    // Turn the feature off.
+    Off,
+    // Use the default setting for the feature.
+    Default,
+  };
+
+  /**
+   * Attribute for handling variable elements.
+   */
+  enum class AlternateHandling {
+    // Treats all the codepoints with non-ignorable primary weights in the
+    // same way (default)
+    NonIgnorable,
+    // Causes codepoints with primary weights that are equal or below the
+    // variable top value to be ignored on primary level and moved to the
+    // quaternary level.
+    Shifted,
+    Default,
+  };
+
+  /**
+   * The strength attribute.
+   *
+   * The usual strength for most locales (except Japanese) is tertiary.
+   *
+   * Quaternary strength is useful when combined with shifted setting for
+   * alternate handling attribute and for JIS X 4061 collation, when it is used
+   * to distinguish between Katakana and Hiragana. Otherwise, quaternary level
+   * is affected only by the number of non-ignorable code points in the string.
+   *
+   * Identical strength is rarely useful, as it amounts to codepoints of the NFD
+   * form of the string.
+   */
+  enum class Strength {
+    // Primary collation strength.
+    Primary,
+    // Secondary collation strength.
+    Secondary,
+    // Tertiary collation strength.
+    Tertiary,
+    // Quaternary collation strength.
+    Quaternary,
+    // Identical collation strength.
+    Identical,
+    Default,
+  };
+
+  /**
+   * Configure the Collation::Strength
+   */
+  void SetStrength(Strength strength);
+
+  /**
+   * Configure Collation::AlternateHandling.
+   */
+  ICUResult SetAlternateHandling(AlternateHandling aAlternateHandling);
+
+  /**
+   * Controls whether an extra case level (positioned before the third level) is
+   * generated or not.
+   *
+   * Contents of the case level are affected by the value of CaseFirst
+   * attribute. A simple way to ignore accent differences in a string is to set
+   * the strength to Primary and enable case level.
+   */
+  ICUResult SetCaseLevel(Feature aFeature);
+
+  /**
+   * When turned on, this attribute makes substrings of digits sort according to
+   * their numeric values.
+   *
+   * This is a way to get '100' to sort AFTER '2'. Note that the longest digit
+   * substring that can be treated as a single unit is 254 digits (not counting
+   * leading zeros). If a digit substring is longer than that, the digits beyond
+   * the limit will be treated as a separate digit substring.
+   *
+   * A "digit" in this sense is a code point with General_Category=Nd, which
+   * does not include circled numbers, roman numerals, etc. Only a contiguous
+   * digit substring is considered, that is, non-negative integers without
+   * separators. There is no support for plus/minus signs, decimals, exponents,
+   * etc.
+   */
+  ICUResult SetNumericCollation(Feature aFeature);
+
+  /**
+   * Controls whether the normalization check and necessary normalizations are
+   * performed.
+   *
+   * When off (default), no normalization check is performed. The correctness of
+   * the result is guaranteed only if the input data is in so-called FCD form
+   * When set to on, an incremental check is performed to see whether the input
+   * data is in the FCD form. If the data is not in the FCD form, incremental
+   * NFD normalization is performed.
+   */
+  ICUResult SetNormalizationMode(Feature aFeature);
+
+  /**
+   * Configure Collation::CaseFirst.
+   */
+  ICUResult SetCaseFirst(CaseFirst aCaseFirst);
+
+#ifndef JS_STANDALONE
+  FRIEND_TEST(IntlCollator, SetAttributesInternal);
+#endif
+
+  ICUPointer<UCollator> mCollator = ICUPointer<UCollator>(nullptr);
+  Maybe<Sensitivity> mLastStrategy = Nothing();
+};
+
+}  // namespace mozilla::intl
+
+#endif
--- a/intl/components/src/ICU4CGlue.cpp
+++ b/intl/components/src/ICU4CGlue.cpp
@ -0,0 +1,19 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/ICU4CGlue.h"
+
+namespace mozilla::intl {
+
+ICUResult ToICUResult(UErrorCode status) {
+  if (U_SUCCESS(status)) {
+    return Ok();
+  }
+  if (status == U_MEMORY_ALLOCATION_ERROR) {
+    return Err(ICUError::OutOfMemory);
+  }
+  return Err(ICUError::InternalError);
+}
+
+}  // namespace mozilla::intl
--- a/intl/components/src/ICU4CGlue.h
+++ b/intl/components/src/ICU4CGlue.h
@ -27,6 +27,11 @@ struct InternalError {};

 using ICUResult = Result<Ok, ICUError>;

+/**
+ * Convert a UErrorCode to ICUResult.
+ */
+ICUResult ToICUResult(UErrorCode status);
+
 /**
 * The ICU status can complain about a string not being terminated, but this
 * is fine for this API, as it deals with the mozilla::Span that has a pointer
@ -36,6 +41,28 @@ static inline bool ICUSuccessForStringSpan(UErrorCode status) {
  return U_SUCCESS(status) || status == U_STRING_NOT_TERMINATED_WARNING;
 }

+/**
+ * This class manages the access to an ICU pointer. It allows requesting either
+ * a mutable or const pointer. This pointer should match the const or mutability
+ * of the ICU APIs. This will then correctly propagate const-ness into the
+ * mozilla::intl APIs.
+ */
+template <typename T>
+class ICUPointer {
+ public:
+  explicit ICUPointer(T* aPointer) : mPointer(aPointer) {}
+
+  // Only allow moves, no copies.
+  ICUPointer(ICUPointer&& other) noexcept = default;
+  ICUPointer& operator=(ICUPointer&& other) noexcept = default;
+
+  const T* GetConst() const { return const_cast<const T*>(mPointer); }
+  T* GetMut() { return mPointer; }
+
+ private:
+  T* mPointer;
+};
+
 /**
 * Calling into ICU with the C-API can be a bit tricky. This function wraps up
 * the relatively risky operations involving pointers, lengths, and buffers into