From b3fc2def82669bb0fd5dc9256b53cfc5a05f6696 Mon Sep 17 00:00:00 2001 From: Jonathan Kew Date: Thu, 23 Mar 2023 13:54:33 +0000 Subject: [PATCH] Bug 208789 - patch 1 - Create an intl::GeneralCategory enum for UnicodeProperties::CharType() to return, to avoid directly referring to ICU4C constants or mapping via harfbuzz constants. r=platform-i18n-reviewers,nordzilla No change in behavior; this just gives us our own version of the general category constants, so we can avoid depending on ICU's constants elsewhere in the codebase. Differential Revision: https://phabricator.services.mozilla.com/D173203 --- intl/components/moz.build | 1 + intl/components/src/GeneralCategory.h | 52 +++++++++++++++++++++ intl/components/src/UnicodeProperties.h | 8 +++- intl/unicharutil/util/nsUnicodeProperties.h | 5 +- 4 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 intl/components/src/GeneralCategory.h diff --git a/intl/components/moz.build b/intl/components/moz.build index 65fe7ddc927a..80230af0eab7 100644 --- a/intl/components/moz.build +++ b/intl/components/moz.build @@ -16,6 +16,7 @@ EXPORTS.mozilla.intl = [ "src/DateTimePatternGenerator.h", "src/DisplayNames.h", "src/FormatBuffer.h", + "src/GeneralCategory.h", "src/ICU4CGlue.h", "src/ICU4CLibrary.h", "src/ICUError.h", diff --git a/intl/components/src/GeneralCategory.h b/intl/components/src/GeneralCategory.h new file mode 100644 index 000000000000..99603ce4b961 --- /dev/null +++ b/intl/components/src/GeneralCategory.h @@ -0,0 +1,52 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef intl_components_GeneralCategory_h_ +#define intl_components_GeneralCategory_h_ + +#include + +namespace mozilla::intl { + +// See https://www.unicode.org/reports/tr44/#General_Category_Values +// for details of these values. + +// The values here must match the values used by ICU's UCharCategory. + +enum class GeneralCategory : uint8_t { + Unassigned = 0, + Uppercase_Letter = 1, + Lowercase_Letter = 2, + Titlecase_Letter = 3, + Modifier_Letter = 4, + Other_Letter = 5, + Nonspacing_Mark = 6, + Enclosing_Mark = 7, + Spacing_Mark = 8, + Decimal_Number = 9, + Letter_Number = 10, + Other_Number = 11, + Space_Separator = 12, + Line_Separator = 13, + Paragraph_Separator = 14, + Control = 15, + Format = 16, + Private_Use = 17, + Surrogate = 18, + Dash_Punctuation = 19, + Open_Punctuation = 20, + Close_Punctuation = 21, + Connector_Punctuation = 22, + Other_Punctuation = 23, + Math_Symbol = 24, + Currency_Symbol = 25, + Modifier_Symbol = 26, + Other_Symbol = 27, + Initial_Punctuation = 28, + Final_Punctuation = 29, + GeneralCategoryCount +}; + +} // namespace mozilla::intl + +#endif diff --git a/intl/components/src/UnicodeProperties.h b/intl/components/src/UnicodeProperties.h index 785bc356f8e6..7fd64e099e06 100644 --- a/intl/components/src/UnicodeProperties.h +++ b/intl/components/src/UnicodeProperties.h @@ -5,6 +5,7 @@ #define intl_components_UnicodeProperties_h_ #include "mozilla/intl/BidiClass.h" +#include "mozilla/intl/GeneralCategory.h" #include "mozilla/intl/ICU4CGlue.h" #include "mozilla/intl/UnicodeScriptCodes.h" #include "mozilla/Vector.h" @@ -34,7 +35,9 @@ class UnicodeProperties final { /** * Return the general category value for the code point. */ - static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); } + static inline GeneralCategory CharType(uint32_t aCh) { + return GeneralCategory(u_charType(aCh)); + } /** * Determine whether the code point has the Bidi_Mirrored property. @@ -222,7 +225,8 @@ class UnicodeProperties final { */ static inline bool IsMathOrMusicSymbol(uint32_t aCh) { // Keep this function in sync with is_math_symbol in base_chars.py. - return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL; + return CharType(aCh) == GeneralCategory::Math_Symbol || + CharType(aCh) == GeneralCategory::Other_Symbol; } static inline Script GetScriptCode(uint32_t aCh) { diff --git a/intl/unicharutil/util/nsUnicodeProperties.h b/intl/unicharutil/util/nsUnicodeProperties.h index 6f3c9dbaf893..1b81e18033f9 100644 --- a/intl/unicharutil/util/nsUnicodeProperties.h +++ b/intl/unicharutil/util/nsUnicodeProperties.h @@ -64,8 +64,11 @@ const uint32_t kEmojiSkinToneLast = 0x1f3ff; extern const hb_unicode_general_category_t sICUtoHBcategory[]; +// NOTE: This returns values matching harfbuzz HB_UNICODE_GENERAL_CATEGORY_* +// constants, NOT the mozilla::intl::GeneralCategory enum. +// For the GeneralCategory enum, use intl::UnicodeProperties::CharType itself. inline uint8_t GetGeneralCategory(uint32_t aCh) { - return sICUtoHBcategory[intl::UnicodeProperties::CharType(aCh)]; + return sICUtoHBcategory[unsigned(intl::UnicodeProperties::CharType(aCh))]; } inline int8_t GetNumericValue(uint32_t aCh) {