Bug 1719550 - Build an initial unified mozilla::intl::Collator; r=platform-i18n-reviewers,nordzilla

This collator attempts to match the options bag from the Intl.Collator API
from ECMA-402. It is built to be compatible and consistent across both Gecko
code and SpiderMonkey code. Its behavior is designed to match ECMA-402.

Differential Revision: https://phabricator.services.mozilla.com/D120494
This commit is contained in:
Greg Tatum 2021-08-10 16:01:15 +00:00
Родитель f41c9baf2b
Коммит c2cf97af11
7 изменённых файлов: 754 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,196 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "gtest/gtest.h"
#include <string.h>
#include "mozilla/intl/Collator.h"
#include "mozilla/Span.h"
#include "TestBuffer.h"
namespace mozilla::intl {
TEST(IntlCollator, SetAttributesInternal)
{
// Run through each settings to make sure MOZ_ASSERT is not triggered for
// misconfigured attributes.
auto result = Collator::TryCreate("en-US");
ASSERT_TRUE(result.isOk());
auto collator = result.unwrap();
collator->SetStrength(Collator::Strength::Primary);
collator->SetStrength(Collator::Strength::Secondary);
collator->SetStrength(Collator::Strength::Tertiary);
collator->SetStrength(Collator::Strength::Quaternary);
collator->SetStrength(Collator::Strength::Identical);
collator->SetStrength(Collator::Strength::Default);
collator->SetAlternateHandling(Collator::AlternateHandling::NonIgnorable)
.unwrap();
collator->SetAlternateHandling(Collator::AlternateHandling::Shifted).unwrap();
collator->SetAlternateHandling(Collator::AlternateHandling::Default).unwrap();
collator->SetCaseFirst(Collator::CaseFirst::False).unwrap();
collator->SetCaseFirst(Collator::CaseFirst::Upper).unwrap();
collator->SetCaseFirst(Collator::CaseFirst::Lower).unwrap();
collator->SetCaseLevel(Collator::Feature::On).unwrap();
collator->SetCaseLevel(Collator::Feature::Off).unwrap();
collator->SetCaseLevel(Collator::Feature::Default).unwrap();
collator->SetNumericCollation(Collator::Feature::On).unwrap();
collator->SetNumericCollation(Collator::Feature::Off).unwrap();
collator->SetNumericCollation(Collator::Feature::Default).unwrap();
collator->SetNormalizationMode(Collator::Feature::On).unwrap();
collator->SetNormalizationMode(Collator::Feature::Off).unwrap();
collator->SetNormalizationMode(Collator::Feature::Default).unwrap();
}
TEST(IntlCollator, GetSortKey)
{
// Do some light sort key comparisons to ensure everything is wired up
// correctly. This is not doing extensive correctness testing.
auto result = Collator::TryCreate("en-US");
ASSERT_TRUE(result.isOk());
auto collator = result.unwrap();
TestBuffer<uint8_t> bufferA;
TestBuffer<uint8_t> bufferB;
auto compareSortKeys = [&](const char16_t* a, const char16_t* b) {
collator->GetSortKey(MakeStringSpan(a), bufferA).unwrap();
collator->GetSortKey(MakeStringSpan(b), bufferB).unwrap();
return strcmp(reinterpret_cast<const char*>(bufferA.data()),
reinterpret_cast<const char*>(bufferB.data()));
};
ASSERT_TRUE(compareSortKeys(u"aaa", u"bbb") < 0);
ASSERT_TRUE(compareSortKeys(u"bbb", u"aaa") > 0);
ASSERT_TRUE(compareSortKeys(u"aaa", u"aaa") == 0);
ASSERT_TRUE(compareSortKeys(u"👍", u"👎") < 0);
}
TEST(IntlCollator, CompareStrings)
{
// Do some light string comparisons to ensure everything is wired up
// correctly. This is not doing extensive correctness testing.
auto result = Collator::TryCreate("en-US");
ASSERT_TRUE(result.isOk());
auto collator = result.unwrap();
TestBuffer<uint8_t> bufferA;
TestBuffer<uint8_t> bufferB;
ASSERT_EQ(collator->CompareStrings(u"aaa", u"bbb"), -1);
ASSERT_EQ(collator->CompareStrings(u"bbb", u"aaa"), 1);
ASSERT_EQ(collator->CompareStrings(u"aaa", u"aaa"), 0);
ASSERT_EQ(collator->CompareStrings(u"👍", u"👎"), -1);
}
TEST(IntlCollator, SetOptionsSensitivity)
{
// Test the ECMA 402 sensitivity behavior per:
// https://tc39.es/ecma402/#sec-collator-comparestrings
auto result = Collator::TryCreate("en-US");
ASSERT_TRUE(result.isOk());
auto collator = result.unwrap();
TestBuffer<uint8_t> bufferA;
TestBuffer<uint8_t> bufferB;
ICUResult optResult = Ok();
Collator::Options options{};
options.sensitivity = Collator::Sensitivity::Base;
optResult = collator->SetOptions(options);
ASSERT_TRUE(optResult.isOk());
ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
options.sensitivity = Collator::Sensitivity::Accent;
optResult = collator->SetOptions(options);
ASSERT_TRUE(optResult.isOk());
ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
options.sensitivity = Collator::Sensitivity::Case;
optResult = collator->SetOptions(options);
ASSERT_TRUE(optResult.isOk());
ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
options.sensitivity = Collator::Sensitivity::Variant;
optResult = collator->SetOptions(options);
ASSERT_TRUE(optResult.isOk());
ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
}
TEST(IntlCollator, LocaleSensitiveCollations)
{
UniquePtr<Collator> collator = nullptr;
TestBuffer<uint8_t> bufferA;
TestBuffer<uint8_t> bufferB;
auto changeLocale = [&](const char* locale) {
auto result = Collator::TryCreate(locale);
ASSERT_TRUE(result.isOk());
collator = result.unwrap();
Collator::Options options{};
options.sensitivity = Collator::Sensitivity::Base;
auto optResult = collator->SetOptions(options);
ASSERT_TRUE(optResult.isOk());
};
// Swedish treats "Ö" as a separate character, which sorts after "Z".
changeLocale("en-US");
ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), -1);
changeLocale("sv-SE");
ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), 1);
// Country names in their respective scripts.
auto china = MakeStringSpan(u"中国");
auto japan = MakeStringSpan(u"日本");
auto korea = MakeStringSpan(u"한국");
changeLocale("en-US");
ASSERT_EQ(collator->CompareStrings(china, japan), -1);
ASSERT_EQ(collator->CompareStrings(china, korea), 1);
changeLocale("zh");
ASSERT_EQ(collator->CompareStrings(china, japan), 1);
ASSERT_EQ(collator->CompareStrings(china, korea), -1);
changeLocale("ja");
ASSERT_EQ(collator->CompareStrings(china, japan), -1);
ASSERT_EQ(collator->CompareStrings(china, korea), -1);
changeLocale("ko");
ASSERT_EQ(collator->CompareStrings(china, japan), 1);
ASSERT_EQ(collator->CompareStrings(china, korea), -1);
}
TEST(IntlCollator, IgnorePunctuation)
{
TestBuffer<uint8_t> bufferA;
TestBuffer<uint8_t> bufferB;
auto result = Collator::TryCreate("en-US");
ASSERT_TRUE(result.isOk());
auto collator = result.unwrap();
Collator::Options options{};
options.ignorePunctuation = true;
auto optResult = collator->SetOptions(options);
ASSERT_TRUE(optResult.isOk());
ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), -1);
options.ignorePunctuation = false;
optResult = collator->SetOptions(options);
ASSERT_TRUE(optResult.isOk());
ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), 1);
}
} // namespace mozilla::intl

Просмотреть файл

@ -6,6 +6,7 @@
UNIFIED_SOURCES += [
"TestCalendar.cpp",
"TestCollator.cpp",
"TestDateTimeFormat.cpp",
"TestNumberFormat.cpp",
"TestPluralRules.cpp",

Просмотреть файл

@ -5,6 +5,7 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
EXPORTS.mozilla.intl = [
"src/Calendar.h",
"src/Collator.h",
"src/DateTimeFormat.h",
"src/DateTimePatternGenerator.h",
"src/ICU4CGlue.h",
@ -14,8 +15,10 @@ EXPORTS.mozilla.intl = [
UNIFIED_SOURCES += [
"src/Calendar.cpp",
"src/Collator.cpp",
"src/DateTimeFormat.cpp",
"src/DateTimePatternGenerator.cpp",
"src/ICU4CGlue.cpp",
"src/NumberFormat.cpp",
"src/NumberFormatFields.cpp",
"src/NumberFormatterSkeleton.cpp",

Просмотреть файл

@ -0,0 +1,253 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include <algorithm>
#include <string.h>
#include "mozilla/intl/Collator.h"
namespace mozilla::intl {
Collator::Collator(UCollator* aCollator) : mCollator(aCollator) {
MOZ_ASSERT(aCollator);
}
Collator::~Collator() {
if (mCollator.GetMut()) {
ucol_close(mCollator.GetMut());
}
}
Result<UniquePtr<Collator>, ICUError> Collator::TryCreate(const char* aLocale) {
UErrorCode status = U_ZERO_ERROR;
UCollator* collator = ucol_open(aLocale, &status);
if (U_SUCCESS(status)) {
return MakeUnique<Collator>(collator);
}
if (status == U_MEMORY_ALLOCATION_ERROR) {
return Err(ICUError::OutOfMemory);
}
return Err(ICUError::InternalError);
};
int32_t Collator::CompareStrings(Span<const char16_t> aSource,
Span<const char16_t> aTarget) const {
switch (ucol_strcoll(mCollator.GetConst(), aSource.data(),
static_cast<int32_t>(aSource.size()), aTarget.data(),
static_cast<int32_t>(aTarget.size()))) {
case UCOL_LESS:
return -1;
case UCOL_EQUAL:
return 0;
case UCOL_GREATER:
return 1;
}
MOZ_ASSERT_UNREACHABLE("ucol_strcoll returned bad UCollationResult");
return 0;
}
int32_t Collator::CompareSortKeys(Span<const uint8_t> aKey1,
Span<const uint8_t> aKey2) const {
size_t minLength = std::min(aKey1.Length(), aKey2.Length());
int32_t tmpResult = strncmp((const char*)aKey1.Elements(),
(const char*)aKey2.Elements(), minLength);
if (tmpResult < 0) {
return -1;
}
if (tmpResult > 0) {
return 1;
}
if (aKey1.Length() > minLength) {
// First string contains second one, so comes later, hence return > 0.
return 1;
}
if (aKey2.Length() > minLength) {
// First string is a substring of second one, so comes earlier,
// hence return < 0.
return -1;
}
return 0;
}
static UColAttributeValue CaseFirstToICU(Collator::CaseFirst caseFirst) {
switch (caseFirst) {
case Collator::CaseFirst::False:
return UCOL_OFF;
case Collator::CaseFirst::Upper:
return UCOL_UPPER_FIRST;
case Collator::CaseFirst::Lower:
return UCOL_LOWER_FIRST;
}
MOZ_ASSERT_UNREACHABLE();
return UCOL_DEFAULT;
}
// Define this as a macro to work around exposing the UColAttributeValue type to
// the header file. Collation::Feature is private to the class.
#define FEATURE_TO_ICU(featureICU, feature) \
switch (feature) { \
case Collator::Feature::On: \
(featureICU) = UCOL_ON; \
break; \
case Collator::Feature::Off: \
(featureICU) = UCOL_OFF; \
break; \
case Collator::Feature::Default: \
(featureICU) = UCOL_DEFAULT; \
break; \
}
void Collator::SetStrength(Collator::Strength aStrength) {
UColAttributeValue strength;
switch (aStrength) {
case Collator::Strength::Default:
strength = UCOL_DEFAULT_STRENGTH;
break;
case Collator::Strength::Primary:
strength = UCOL_PRIMARY;
break;
case Collator::Strength::Secondary:
strength = UCOL_SECONDARY;
break;
case Collator::Strength::Tertiary:
strength = UCOL_TERTIARY;
break;
case Collator::Strength::Quaternary:
strength = UCOL_QUATERNARY;
break;
case Collator::Strength::Identical:
strength = UCOL_IDENTICAL;
break;
}
ucol_setStrength(mCollator.GetMut(), strength);
}
ICUResult Collator::SetCaseLevel(Collator::Feature aFeature) {
UErrorCode status = U_ZERO_ERROR;
UColAttributeValue featureICU;
FEATURE_TO_ICU(featureICU, aFeature);
ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_LEVEL, featureICU, &status);
return ToICUResult(status);
}
ICUResult Collator::SetAlternateHandling(
Collator::AlternateHandling aAlternateHandling) {
UErrorCode status = U_ZERO_ERROR;
UColAttributeValue handling;
switch (aAlternateHandling) {
case Collator::AlternateHandling::NonIgnorable:
handling = UCOL_NON_IGNORABLE;
break;
case Collator::AlternateHandling::Shifted:
handling = UCOL_SHIFTED;
break;
case Collator::AlternateHandling::Default:
handling = UCOL_DEFAULT;
break;
}
ucol_setAttribute(mCollator.GetMut(), UCOL_ALTERNATE_HANDLING, handling,
&status);
return ToICUResult(status);
}
ICUResult Collator::SetNumericCollation(Collator::Feature aFeature) {
UErrorCode status = U_ZERO_ERROR;
UColAttributeValue featureICU;
FEATURE_TO_ICU(featureICU, aFeature);
ucol_setAttribute(mCollator.GetMut(), UCOL_NUMERIC_COLLATION, featureICU,
&status);
return ToICUResult(status);
}
ICUResult Collator::SetNormalizationMode(Collator::Feature aFeature) {
UErrorCode status = U_ZERO_ERROR;
UColAttributeValue featureICU;
FEATURE_TO_ICU(featureICU, aFeature);
ucol_setAttribute(mCollator.GetMut(), UCOL_NORMALIZATION_MODE, featureICU,
&status);
return ToICUResult(status);
}
ICUResult Collator::SetCaseFirst(Collator::CaseFirst aCaseFirst) {
UErrorCode status = U_ZERO_ERROR;
ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_FIRST,
CaseFirstToICU(aCaseFirst), &status);
return ToICUResult(status);
}
ICUResult Collator::SetOptions(const Options& aOptions,
const Maybe<Options&> aPrevOptions) {
if (aPrevOptions &&
// Check the equality of the previous options.
aPrevOptions->sensitivity == aOptions.sensitivity &&
aPrevOptions->caseFirst == aOptions.caseFirst &&
aPrevOptions->ignorePunctuation == aOptions.ignorePunctuation &&
aPrevOptions->numeric == aOptions.numeric) {
return Ok();
}
Collator::Strength strength = Collator::Strength::Default;
Collator::Feature caseLevel = Collator::Feature::Off;
switch (aOptions.sensitivity) {
case Collator::Sensitivity::Base:
strength = Collator::Strength::Primary;
break;
case Collator::Sensitivity::Accent:
strength = Collator::Strength::Secondary;
break;
case Collator::Sensitivity::Case:
caseLevel = Collator::Feature::On;
strength = Collator::Strength::Primary;
break;
case Collator::Sensitivity::Variant:
strength = Collator::Strength::Tertiary;
break;
}
SetStrength(strength);
ICUResult result = Ok();
// According to the ICU team, UCOL_SHIFTED causes punctuation to be
// ignored. Looking at Unicode Technical Report 35, Unicode Locale Data
// Markup Language, "shifted" causes whitespace and punctuation to be
// ignored - that's a bit more than asked for, but there's no way to get
// less.
result = this->SetAlternateHandling(
aOptions.ignorePunctuation ? Collator::AlternateHandling::Shifted
: Collator::AlternateHandling::Default);
if (result.isErr()) {
return result;
}
result = SetCaseLevel(caseLevel);
if (result.isErr()) {
return result;
}
result = SetNumericCollation(aOptions.numeric ? Collator::Feature::On
: Collator::Feature::Off);
if (result.isErr()) {
return result;
}
// Normalization is always on to meet the canonical equivalence requirement.
result = SetNormalizationMode(Collator::Feature::On);
if (result.isErr()) {
return result;
}
result = SetCaseFirst(aOptions.caseFirst);
if (result.isErr()) {
return result;
}
return Ok();
}
#undef FEATURE_TO_ICU
} // namespace mozilla::intl

Просмотреть файл

@ -0,0 +1,255 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef intl_components_Collator_h_
#define intl_components_Collator_h_
#ifndef JS_STANDALONE
# include "gtest/MozGtestFriend.h"
#endif
#include "unicode/ucol.h"
#include "mozilla/intl/ICU4CGlue.h"
#include "mozilla/Result.h"
#include "mozilla/Span.h"
namespace mozilla::intl {
class Collator final {
public:
/**
* Construct from a raw UCollator. This is public so that the UniquePtr can
* access it.
*/
explicit Collator(UCollator* aCollator);
// Do not allow copy as this class owns the ICU resource. Move is not
// currently implemented, but a custom move operator could be created if
// needed.
Collator(const Collator&) = delete;
Collator& operator=(const Collator&) = delete;
/**
* Attempt to initialize a new collator.
*/
static Result<UniquePtr<Collator>, ICUError> TryCreate(const char* aLocale);
~Collator();
template <typename B>
ICUResult GetSortKey(Span<const char16_t> aString, B& aBuffer) const {
static_assert(std::is_same_v<typename B::CharType, uint8_t>,
"Expected a uint8_t* buffer.");
// Do not use FillBufferWithICUCall, as this API does not report the
// U_BUFFER_OVERFLOW_ERROR. The return value is always the number of bytes
// needed, regardless of whether the result buffer was big enough.
UErrorCode status = U_ZERO_ERROR;
int32_t length =
ucol_getSortKey(mCollator.GetConst(), aString.data(),
static_cast<int32_t>(aString.size()), nullptr, 0);
if (U_FAILURE(status) || length == 0) {
// If the length is 0, and internal error occurred according to the docs.
return Err(ICUError::InternalError);
}
if (!aBuffer.reserve(length)) {
return Err(ICUError::OutOfMemory);
}
length = ucol_getSortKey(mCollator.GetConst(), aString.data(),
aString.size(), aBuffer.data(), length);
if (U_FAILURE(status) || length == 0) {
return Err(ICUError::InternalError);
}
aBuffer.written(length);
return Ok();
}
int32_t CompareStrings(Span<const char16_t> aSource,
Span<const char16_t> aTarget) const;
int32_t CompareSortKeys(Span<const uint8_t> aKey1,
Span<const uint8_t> aKey2) const;
/**
* Determine how casing affects sorting. These options map to ECMA 402
* collator options.
*
* https://tc39.es/ecma402/#sec-initializecollator
*/
enum class CaseFirst {
// Sort upper case first.
Upper,
// Sort lower case first.
Lower,
// Orders upper and lower case letters in accordance to their tertiary
// weights.
False,
};
/**
* Which differences in the strings should lead to differences in collation
* comparisons.
*
* This setting needs to be ECMA 402 compliant.
* https://tc39.es/ecma402/#sec-collator-comparestrings
*/
enum class Sensitivity {
// Only strings that differ in base letters compare as unequal.
// Examples: a ≠ b, a = á, a = A.
Base,
// Only strings that differ in base letters or accents and other diacritic
// marks compare as unequal.
// Examples: a ≠ b, a ≠ á, a = A.
Accent,
// Only strings that differ in base letters or case compare as unequal.
// Examples: a ≠ b, a = á, a ≠ A.
Case,
// Strings that differ in base letters, accents and other diacritic marks,
// or case compare as unequal. Other differences may also be taken into
// consideration.
// Examples: a ≠ b, a ≠ á, a ≠ A.
Variant,
};
/**
* These options map to ECMA 402 collator options. Make sure the defaults map
* to the default initialized values of ECMA 402.
*
* https://tc39.es/ecma402/#sec-initializecollator
*/
struct Options {
Sensitivity sensitivity = Sensitivity::Variant;
CaseFirst caseFirst = CaseFirst::False;
bool ignorePunctuation = false;
bool numeric = false;
};
/**
* Change the configuraton of the options.
*/
ICUResult SetOptions(const Options& aOptions,
const Maybe<Options&> aPrevOptions = Nothing());
private:
/**
* Toggle features, or use the default setting.
*/
enum class Feature {
// Turn the feature off.
On,
// Turn the feature off.
Off,
// Use the default setting for the feature.
Default,
};
/**
* Attribute for handling variable elements.
*/
enum class AlternateHandling {
// Treats all the codepoints with non-ignorable primary weights in the
// same way (default)
NonIgnorable,
// Causes codepoints with primary weights that are equal or below the
// variable top value to be ignored on primary level and moved to the
// quaternary level.
Shifted,
Default,
};
/**
* The strength attribute.
*
* The usual strength for most locales (except Japanese) is tertiary.
*
* Quaternary strength is useful when combined with shifted setting for
* alternate handling attribute and for JIS X 4061 collation, when it is used
* to distinguish between Katakana and Hiragana. Otherwise, quaternary level
* is affected only by the number of non-ignorable code points in the string.
*
* Identical strength is rarely useful, as it amounts to codepoints of the NFD
* form of the string.
*/
enum class Strength {
// Primary collation strength.
Primary,
// Secondary collation strength.
Secondary,
// Tertiary collation strength.
Tertiary,
// Quaternary collation strength.
Quaternary,
// Identical collation strength.
Identical,
Default,
};
/**
* Configure the Collation::Strength
*/
void SetStrength(Strength strength);
/**
* Configure Collation::AlternateHandling.
*/
ICUResult SetAlternateHandling(AlternateHandling aAlternateHandling);
/**
* Controls whether an extra case level (positioned before the third level) is
* generated or not.
*
* Contents of the case level are affected by the value of CaseFirst
* attribute. A simple way to ignore accent differences in a string is to set
* the strength to Primary and enable case level.
*/
ICUResult SetCaseLevel(Feature aFeature);
/**
* When turned on, this attribute makes substrings of digits sort according to
* their numeric values.
*
* This is a way to get '100' to sort AFTER '2'. Note that the longest digit
* substring that can be treated as a single unit is 254 digits (not counting
* leading zeros). If a digit substring is longer than that, the digits beyond
* the limit will be treated as a separate digit substring.
*
* A "digit" in this sense is a code point with General_Category=Nd, which
* does not include circled numbers, roman numerals, etc. Only a contiguous
* digit substring is considered, that is, non-negative integers without
* separators. There is no support for plus/minus signs, decimals, exponents,
* etc.
*/
ICUResult SetNumericCollation(Feature aFeature);
/**
* Controls whether the normalization check and necessary normalizations are
* performed.
*
* When off (default), no normalization check is performed. The correctness of
* the result is guaranteed only if the input data is in so-called FCD form
* When set to on, an incremental check is performed to see whether the input
* data is in the FCD form. If the data is not in the FCD form, incremental
* NFD normalization is performed.
*/
ICUResult SetNormalizationMode(Feature aFeature);
/**
* Configure Collation::CaseFirst.
*/
ICUResult SetCaseFirst(CaseFirst aCaseFirst);
#ifndef JS_STANDALONE
FRIEND_TEST(IntlCollator, SetAttributesInternal);
#endif
ICUPointer<UCollator> mCollator = ICUPointer<UCollator>(nullptr);
Maybe<Sensitivity> mLastStrategy = Nothing();
};
} // namespace mozilla::intl
#endif

Просмотреть файл

@ -0,0 +1,19 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/intl/ICU4CGlue.h"
namespace mozilla::intl {
ICUResult ToICUResult(UErrorCode status) {
if (U_SUCCESS(status)) {
return Ok();
}
if (status == U_MEMORY_ALLOCATION_ERROR) {
return Err(ICUError::OutOfMemory);
}
return Err(ICUError::InternalError);
}
} // namespace mozilla::intl

Просмотреть файл

@ -27,6 +27,11 @@ struct InternalError {};
using ICUResult = Result<Ok, ICUError>;
/**
* Convert a UErrorCode to ICUResult.
*/
ICUResult ToICUResult(UErrorCode status);
/**
* The ICU status can complain about a string not being terminated, but this
* is fine for this API, as it deals with the mozilla::Span that has a pointer
@ -36,6 +41,28 @@ static inline bool ICUSuccessForStringSpan(UErrorCode status) {
return U_SUCCESS(status) || status == U_STRING_NOT_TERMINATED_WARNING;
}
/**
* This class manages the access to an ICU pointer. It allows requesting either
* a mutable or const pointer. This pointer should match the const or mutability
* of the ICU APIs. This will then correctly propagate const-ness into the
* mozilla::intl APIs.
*/
template <typename T>
class ICUPointer {
public:
explicit ICUPointer(T* aPointer) : mPointer(aPointer) {}
// Only allow moves, no copies.
ICUPointer(ICUPointer&& other) noexcept = default;
ICUPointer& operator=(ICUPointer&& other) noexcept = default;
const T* GetConst() const { return const_cast<const T*>(mPointer); }
T* GetMut() { return mPointer; }
private:
T* mPointer;
};
/**
* Calling into ICU with the C-API can be a bit tricky. This function wraps up
* the relatively risky operations involving pointers, lengths, and buffers into