Bug 208789 - patch 1 - Create an intl::GeneralCategory enum for UnicodeProperties::CharType() to return, to avoid directly referring to ICU4C constants or mapping via harfbuzz constants. r=platform-i18n-reviewers,nordzilla

No change in behavior; this just gives us our own version of the general category constants,
so we can avoid depending on ICU's constants elsewhere in the codebase.

Differential Revision: https://phabricator.services.mozilla.com/D173203
This commit is contained in:
Jonathan Kew 2023-03-23 13:54:33 +00:00
Родитель ee4bdd5b2e
Коммит b3fc2def82
4 изменённых файлов: 63 добавлений и 3 удалений

Просмотреть файл

@ -16,6 +16,7 @@ EXPORTS.mozilla.intl = [
"src/DateTimePatternGenerator.h",
"src/DisplayNames.h",
"src/FormatBuffer.h",
"src/GeneralCategory.h",
"src/ICU4CGlue.h",
"src/ICU4CLibrary.h",
"src/ICUError.h",

Просмотреть файл

@ -0,0 +1,52 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef intl_components_GeneralCategory_h_
#define intl_components_GeneralCategory_h_
#include <cstdint>
namespace mozilla::intl {
// See https://www.unicode.org/reports/tr44/#General_Category_Values
// for details of these values.
// The values here must match the values used by ICU's UCharCategory.
enum class GeneralCategory : uint8_t {
Unassigned = 0,
Uppercase_Letter = 1,
Lowercase_Letter = 2,
Titlecase_Letter = 3,
Modifier_Letter = 4,
Other_Letter = 5,
Nonspacing_Mark = 6,
Enclosing_Mark = 7,
Spacing_Mark = 8,
Decimal_Number = 9,
Letter_Number = 10,
Other_Number = 11,
Space_Separator = 12,
Line_Separator = 13,
Paragraph_Separator = 14,
Control = 15,
Format = 16,
Private_Use = 17,
Surrogate = 18,
Dash_Punctuation = 19,
Open_Punctuation = 20,
Close_Punctuation = 21,
Connector_Punctuation = 22,
Other_Punctuation = 23,
Math_Symbol = 24,
Currency_Symbol = 25,
Modifier_Symbol = 26,
Other_Symbol = 27,
Initial_Punctuation = 28,
Final_Punctuation = 29,
GeneralCategoryCount
};
} // namespace mozilla::intl
#endif

Просмотреть файл

@ -5,6 +5,7 @@
#define intl_components_UnicodeProperties_h_
#include "mozilla/intl/BidiClass.h"
#include "mozilla/intl/GeneralCategory.h"
#include "mozilla/intl/ICU4CGlue.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "mozilla/Vector.h"
@ -34,7 +35,9 @@ class UnicodeProperties final {
/**
* Return the general category value for the code point.
*/
static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); }
static inline GeneralCategory CharType(uint32_t aCh) {
return GeneralCategory(u_charType(aCh));
}
/**
* Determine whether the code point has the Bidi_Mirrored property.
@ -222,7 +225,8 @@ class UnicodeProperties final {
*/
static inline bool IsMathOrMusicSymbol(uint32_t aCh) {
// Keep this function in sync with is_math_symbol in base_chars.py.
return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL;
return CharType(aCh) == GeneralCategory::Math_Symbol ||
CharType(aCh) == GeneralCategory::Other_Symbol;
}
static inline Script GetScriptCode(uint32_t aCh) {

Просмотреть файл

@ -64,8 +64,11 @@ const uint32_t kEmojiSkinToneLast = 0x1f3ff;
extern const hb_unicode_general_category_t sICUtoHBcategory[];
// NOTE: This returns values matching harfbuzz HB_UNICODE_GENERAL_CATEGORY_*
// constants, NOT the mozilla::intl::GeneralCategory enum.
// For the GeneralCategory enum, use intl::UnicodeProperties::CharType itself.
inline uint8_t GetGeneralCategory(uint32_t aCh) {
return sICUtoHBcategory[intl::UnicodeProperties::CharType(aCh)];
return sICUtoHBcategory[unsigned(intl::UnicodeProperties::CharType(aCh))];
}
inline int8_t GetNumericValue(uint32_t aCh) {