From c0ebed22d3410e50de2f309f24c4fcadd0a152f5 Mon Sep 17 00:00:00 2001 From: Dan Minor Date: Fri, 3 Dec 2021 20:49:31 +0000 Subject: [PATCH] Bug 1719554 - Unify most of nsUnicodeProperties.h; r=platform-i18n-reviewers,jfkthame,gregtatum,necko-reviewers,valentin This unifies most of the calls in nsUnicodeProperties.h. CharType and Script will be handled in subsequent patches on this bug. Differential Revision: https://phabricator.services.mozilla.com/D132273 --- dom/base/DirectionalityUtils.cpp | 1 + dom/serializers/nsPlainTextSerializer.cpp | 5 +- gfx/thebes/gfxHarfBuzzShaper.cpp | 10 +- gfx/thebes/gfxScriptItemizer.cpp | 7 +- intl/components/moz.build | 1 + intl/components/src/UnicodeProperties.h | 219 ++++++++++++++++++ intl/lwbrk/LineBreaker.cpp | 19 +- intl/unicharutil/util/nsUnicharUtils.cpp | 19 +- intl/unicharutil/util/nsUnicodeProperties.cpp | 3 +- intl/unicharutil/util/nsUnicodeProperties.h | 103 ++------ netwerk/dns/nsIDNService.cpp | 4 +- toolkit/components/find/nsFind.cpp | 3 +- .../places/tests/gtest/test_casing.cpp | 6 +- 13 files changed, 286 insertions(+), 114 deletions(-) create mode 100644 intl/components/src/UnicodeProperties.h diff --git a/dom/base/DirectionalityUtils.cpp b/dom/base/DirectionalityUtils.cpp index 40c24454317d..83e77b777e60 100644 --- a/dom/base/DirectionalityUtils.cpp +++ b/dom/base/DirectionalityUtils.cpp @@ -215,6 +215,7 @@ #include "mozilla/dom/Element.h" #include "mozilla/dom/HTMLSlotElement.h" #include "mozilla/dom/ShadowRoot.h" +#include "mozilla/intl/UnicodeProperties.h" #include "nsUnicodeProperties.h" #include "nsTextFragment.h" #include "nsAttrValue.h" diff --git a/dom/serializers/nsPlainTextSerializer.cpp b/dom/serializers/nsPlainTextSerializer.cpp index 318e93517504..508bef3c152f 100644 --- a/dom/serializers/nsPlainTextSerializer.cpp +++ b/dom/serializers/nsPlainTextSerializer.cpp @@ -22,7 +22,6 @@ #include "nsContentUtils.h" #include "nsReadableUtils.h" #include "nsUnicharUtils.h" -#include "nsUnicodeProperties.h" #include "nsCRT.h" #include "mozilla/Casting.h" #include "mozilla/EditorUtils.h" @@ -31,6 +30,8 @@ #include "mozilla/dom/HTMLBRElement.h" #include "mozilla/dom/Text.h" #include "mozilla/intl/Segmenter.h" +#include "mozilla/intl/UnicodeProperties.h" +#include "nsUnicodeProperties.h" #include "mozilla/Span.h" #include "mozilla/Preferences.h" #include "mozilla/StaticPrefs_converter.h" @@ -1803,7 +1804,7 @@ int32_t GetUnicharWidth(char32_t aCh) { return 1; } - return unicode::IsEastAsianWidthFW(aCh) ? 2 : 1; + return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1; } int32_t GetUnicharStringWidth(Span aString) { diff --git a/gfx/thebes/gfxHarfBuzzShaper.cpp b/gfx/thebes/gfxHarfBuzzShaper.cpp index e80a7d1d6a76..7aabd9fb8865 100644 --- a/gfx/thebes/gfxHarfBuzzShaper.cpp +++ b/gfx/thebes/gfxHarfBuzzShaper.cpp @@ -11,6 +11,7 @@ #include "gfxTextRun.h" #include "mozilla/Sprintf.h" #include "mozilla/intl/String.h" +#include "mozilla/intl/UnicodeProperties.h" #include "nsUnicodeProperties.h" #include "nsUnicodeScriptCodes.h" @@ -981,7 +982,7 @@ static hb_position_t HBGetHKerning(hb_font_t* font, void* font_data, static hb_codepoint_t HBGetMirroring(hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, void* user_data) { - return GetMirroredChar(aCh); + return intl::UnicodeProperties::CharMirror(aCh); } static hb_unicode_general_category_t HBGetGeneralCategory( @@ -996,13 +997,14 @@ static hb_script_t HBGetScript(hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, static hb_unicode_combining_class_t HBGetCombiningClass( hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, void* user_data) { - return hb_unicode_combining_class_t(GetCombiningClass(aCh)); + return hb_unicode_combining_class_t( + intl::UnicodeProperties::GetCombiningClass(aCh)); } static hb_bool_t HBUnicodeCompose(hb_unicode_funcs_t* ufuncs, hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t* ab, void* user_data) { - char32_t ch = mozilla::intl::String::ComposePairNFC(a, b); + char32_t ch = intl::String::ComposePairNFC(a, b); if (ch > 0) { *ab = ch; return true; @@ -1025,7 +1027,7 @@ static hb_bool_t HBUnicodeDecompose(hb_unicode_funcs_t* ufuncs, #endif char32_t decomp[2] = {0}; - if (mozilla::intl::String::DecomposeRawNFD(ab, decomp)) { + if (intl::String::DecomposeRawNFD(ab, decomp)) { if (decomp[1] || decomp[0] != ab) { *a = decomp[0]; *b = decomp[1]; diff --git a/gfx/thebes/gfxScriptItemizer.cpp b/gfx/thebes/gfxScriptItemizer.cpp index fe85f1e87aa1..f2456b7b7df9 100644 --- a/gfx/thebes/gfxScriptItemizer.cpp +++ b/gfx/thebes/gfxScriptItemizer.cpp @@ -49,6 +49,7 @@ #include "gfxScriptItemizer.h" #include "mozilla/intl/Script.h" +#include "mozilla/intl/UnicodeProperties.h" #include "nsUnicodeProperties.h" #include "nsCharTraits.h" #include "harfbuzz/hb.h" @@ -177,12 +178,12 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, */ gc = GetGeneralCategory(ch); if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) { - uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch); + uint32_t endPairChar = mozilla::intl::UnicodeProperties::CharMirror(ch); if (endPairChar != ch) { push(endPairChar, scriptCode); } } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && - HasMirroredChar(ch)) { + mozilla::intl::UnicodeProperties::IsMirrored(ch)) { while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) { pop(); } @@ -220,7 +221,7 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, * pop the matching open character from the stack */ if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && - HasMirroredChar(ch)) { + mozilla::intl::UnicodeProperties::IsMirrored(ch)) { pop(); } } else { diff --git a/intl/components/moz.build b/intl/components/moz.build index 4f0a5328bc1e..760a337c5ac3 100644 --- a/intl/components/moz.build +++ b/intl/components/moz.build @@ -30,6 +30,7 @@ EXPORTS.mozilla.intl = [ "src/Script.h", "src/String.h", "src/TimeZone.h", + "src/UnicodeProperties.h", ] UNIFIED_SOURCES += [ diff --git a/intl/components/src/UnicodeProperties.h b/intl/components/src/UnicodeProperties.h new file mode 100644 index 000000000000..051c4647291c --- /dev/null +++ b/intl/components/src/UnicodeProperties.h @@ -0,0 +1,219 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef intl_components_UnicodeProperties_h_ +#define intl_components_UnicodeProperties_h_ + +#include "unicode/uchar.h" +#include "unicode/uscript.h" + +namespace mozilla::intl { + +/** + * This component is a Mozilla-focused API for working with text properties. + */ +class UnicodeProperties final { + public: + /** + * Maps the specified character to a "mirror-image" character. + */ + static inline uint32_t CharMirror(uint32_t aCh) { return u_charMirror(aCh); } + + /** + * Return the general category value for the code point. + */ + static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); } + + /** + * Determine whether the code point has the Bidi_Mirrored property. + */ + static inline bool IsMirrored(uint32_t aCh) { return u_isMirrored(aCh); } + + /** + * Returns the combining class of the code point as specified in + * UnicodeData.txt. + */ + static inline uint8_t GetCombiningClass(uint32_t aCh) { + return u_getCombiningClass(aCh); + } + + enum class IntProperty { + BidiPairedBracketType, + EastAsianWidth, + HangulSyllableType, + LineBreak, + NumericType, + }; + + /** + * Get the property value for an enumerated or integer Unicode property for a + * code point. + */ + static inline int32_t GetIntPropertyValue(uint32_t aCh, IntProperty aProp) { + UProperty prop; + switch (aProp) { + case IntProperty::BidiPairedBracketType: + prop = UCHAR_BIDI_PAIRED_BRACKET_TYPE; + break; + case IntProperty::EastAsianWidth: + prop = UCHAR_EAST_ASIAN_WIDTH; + break; + case IntProperty::HangulSyllableType: + prop = UCHAR_HANGUL_SYLLABLE_TYPE; + break; + case IntProperty::LineBreak: + prop = UCHAR_LINE_BREAK; + break; + case IntProperty::NumericType: + prop = UCHAR_NUMERIC_TYPE; + break; + } + return u_getIntPropertyValue(aCh, prop); + } + + /** + * Get the numeric value for a Unicode code point as defined in the + * Unicode Character Database if the input is decimal or a digit, + * otherwise, returns -1. + */ + static inline int8_t GetNumericValue(uint32_t aCh) { + UNumericType type = + UNumericType(GetIntPropertyValue(aCh, IntProperty::NumericType)); + return type == U_NT_DECIMAL || type == U_NT_DIGIT + ? int8_t(u_getNumericValue(aCh)) + : -1; + } + + /** + * Maps the specified character to its paired bracket character. + */ + static inline uint32_t GetBidiPairedBracket(uint32_t aCh) { + return u_getBidiPairedBracket(aCh); + } + + /** + * The given character is mapped to its uppercase equivalent according to + * UnicodeData.txt; if the character has no uppercase equivalent, the + * character itself is returned. + */ + static inline uint32_t ToUpper(uint32_t aCh) { return u_toupper(aCh); } + + /** + * The given character is mapped to its lowercase equivalent according to + * UnicodeData.txt; if the character has no lowercase equivalent, the + * character itself is returned. + */ + static inline uint32_t ToLower(uint32_t aCh) { return u_tolower(aCh); } + + /** + * Check if a code point has the Lowercase Unicode property. + */ + static inline bool IsLowercase(uint32_t aCh) { return u_isULowercase(aCh); } + + /** + * The given character is mapped to its titlecase equivalent according to + * UnicodeData.txt; if the character has no titlecase equivalent, the + * character itself is returned. + */ + static inline uint32_t ToTitle(uint32_t aCh) { return u_totitle(aCh); } + + /** + * The given character is mapped to its case folding equivalent according to + * UnicodeData.txt and CaseFolding.txt; + * if the character has no case folding equivalent, the character + * itself is returned. + */ + static inline uint32_t FoldCase(uint32_t aCh) { + return u_foldCase(aCh, U_FOLD_CASE_DEFAULT); + } + + enum class BinaryProperty { + DefaultIgnorableCodePoint, + Emoji, + EmojiPresentation, + }; + + /** + * Check a binary Unicode property for a code point. + */ + static inline bool HasBinaryProperty(uint32_t aCh, BinaryProperty aProp) { + UProperty prop; + switch (aProp) { + case BinaryProperty::DefaultIgnorableCodePoint: + prop = UCHAR_DEFAULT_IGNORABLE_CODE_POINT; + break; + case BinaryProperty::Emoji: + prop = UCHAR_EMOJI; + break; + case BinaryProperty::EmojiPresentation: + prop = UCHAR_EMOJI_PRESENTATION; + break; + } + return u_hasBinaryProperty(aCh, prop); + } + + /** + * Check if the width of aCh is full width, half width or wide + * excluding emoji. + */ + static inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) { + switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) { + case U_EA_FULLWIDTH: + case U_EA_HALFWIDTH: + return true; + case U_EA_WIDE: + return HasBinaryProperty(aCh, BinaryProperty::Emoji) ? false : true; + case U_EA_AMBIGUOUS: + case U_EA_NARROW: + case U_EA_NEUTRAL: + return false; + } + return false; + } + + /** + * Check if the width of aCh is ambiguous, full width, or wide. + */ + static inline bool IsEastAsianWidthAFW(uint32_t aCh) { + switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) { + case U_EA_AMBIGUOUS: + case U_EA_FULLWIDTH: + case U_EA_WIDE: + return true; + case U_EA_HALFWIDTH: + case U_EA_NARROW: + case U_EA_NEUTRAL: + return false; + } + return false; + } + + /** + * Check if the width of aCh is full width, or wide. + */ + static inline bool IsEastAsianWidthFW(uint32_t aCh) { + switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) { + case U_EA_FULLWIDTH: + case U_EA_WIDE: + return true; + case U_EA_AMBIGUOUS: + case U_EA_HALFWIDTH: + case U_EA_NARROW: + case U_EA_NEUTRAL: + return false; + } + return false; + } + + /** + * Check if the CharType of aCh is math or other symbol. + */ + static inline bool IsMathOrMusicSymbol(uint32_t aCh) { + // Keep this function in sync with is_math_symbol in base_chars.py. + return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL; + } +}; + +} // namespace mozilla::intl + +#endif diff --git a/intl/lwbrk/LineBreaker.cpp b/intl/lwbrk/LineBreaker.cpp index 60352f92c381..45c073b7bb3f 100644 --- a/intl/lwbrk/LineBreaker.cpp +++ b/intl/lwbrk/LineBreaker.cpp @@ -11,6 +11,7 @@ #include "nsUnicodeProperties.h" #include "mozilla/ArrayUtils.h" #include "mozilla/intl/Segmenter.h" +#include "mozilla/intl/UnicodeProperties.h" using namespace mozilla::unicode; using namespace mozilla::intl; @@ -462,10 +463,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel, return CLASS_CLOSE_LIKE_CHARACTER; } if (aIsChineseOrJapanese) { - if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) { + if (cls == U_LB_POSTFIX_NUMERIC && + UnicodeProperties::IsEastAsianWidthAFW(u)) { return CLASS_CLOSE_LIKE_CHARACTER; } - if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) { + if (cls == U_LB_PREFIX_NUMERIC && + UnicodeProperties::IsEastAsianWidthAFW(u)) { return CLASS_OPEN_LIKE_CHARACTER; } if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) { @@ -485,10 +488,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel, return CLASS_CLOSE_LIKE_CHARACTER; } if (aIsChineseOrJapanese) { - if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) { + if (cls == U_LB_POSTFIX_NUMERIC && + UnicodeProperties::IsEastAsianWidthAFW(u)) { return CLASS_CLOSE_LIKE_CHARACTER; } - if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) { + if (cls == U_LB_PREFIX_NUMERIC && + UnicodeProperties::IsEastAsianWidthAFW(u)) { return CLASS_OPEN_LIKE_CHARACTER; } if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) { @@ -513,10 +518,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel, u == 0xFF01 || u == 0xFF1F) { return CLASS_BREAKABLE; } - if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) { + if (cls == U_LB_POSTFIX_NUMERIC && + UnicodeProperties::IsEastAsianWidthAFW(u)) { return CLASS_BREAKABLE; } - if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) { + if (cls == U_LB_PREFIX_NUMERIC && + UnicodeProperties::IsEastAsianWidthAFW(u)) { return CLASS_BREAKABLE; } if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) { diff --git a/intl/unicharutil/util/nsUnicharUtils.cpp b/intl/unicharutil/util/nsUnicharUtils.cpp index fb4c4f8d2c6e..7121bee043b8 100644 --- a/intl/unicharutil/util/nsUnicharUtils.cpp +++ b/intl/unicharutil/util/nsUnicharUtils.cpp @@ -8,6 +8,7 @@ #include "nsUnicodeProperties.h" #include "mozilla/Likely.h" #include "mozilla/HashFunctions.h" +#include "mozilla/intl/UnicodeProperties.h" // We map x -> x, except for upper-case letters, // which we map to their lower-case equivalents. @@ -33,7 +34,7 @@ static MOZ_ALWAYS_INLINE uint32_t ToLowerCase_inline(uint32_t aChar) { return gASCIIToLower[aChar]; } - return mozilla::unicode::GetLowercase(aChar); + return mozilla::intl::UnicodeProperties::ToLower(aChar); } static MOZ_ALWAYS_INLINE uint32_t @@ -244,7 +245,8 @@ void ToLowerCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) { for (uint32_t i = 0; i < aLen; i++) { uint32_t ch = aIn[i]; if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) { - ch = mozilla::unicode::GetLowercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); + ch = mozilla::intl::UnicodeProperties::ToLower( + SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); @@ -269,14 +271,15 @@ uint32_t ToUpperCase(uint32_t aChar) { return aChar; } - return mozilla::unicode::GetUppercase(aChar); + return mozilla::intl::UnicodeProperties::ToUpper(aChar); } void ToUpperCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) { for (uint32_t i = 0; i < aLen; i++) { uint32_t ch = aIn[i]; if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) { - ch = mozilla::unicode::GetUppercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); + ch = mozilla::intl::UnicodeProperties::ToUpper( + SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); @@ -362,7 +365,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline( // we don't go through ToLowerCase here, because we know this isn't // an ASCII character so the ASCII fast-path there is useless - c = mozilla::unicode::GetLowercase(c); + c = mozilla::intl::UnicodeProperties::ToLower(c); *aNext = aStr + 2; return c; @@ -377,7 +380,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline( c += (str[1] & 0x3F) << 6; c += (str[2] & 0x3F); - c = mozilla::unicode::GetLowercase(c); + c = mozilla::intl::UnicodeProperties::ToLower(c); *aNext = aStr + 3; return c; @@ -392,7 +395,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline( c += (str[2] & 0x3F) << 6; c += (str[3] & 0x3F); - c = mozilla::unicode::GetLowercase(c); + c = mozilla::intl::UnicodeProperties::ToLower(c); *aNext = aStr + 4; return c; @@ -514,7 +517,7 @@ uint32_t HashUTF8AsUTF16(const char* aUTF8, uint32_t aLength, bool* aErr) { } bool IsSegmentBreakSkipChar(uint32_t u) { - return unicode::IsEastAsianWidthFHWexcludingEmoji(u) && + return intl::UnicodeProperties::IsEastAsianWidthFHWexcludingEmoji(u) && unicode::GetScriptCode(u) != unicode::Script::HANGUL; } diff --git a/intl/unicharutil/util/nsUnicodeProperties.cpp b/intl/unicharutil/util/nsUnicodeProperties.cpp index 942f65b2da41..69edf03e4206 100644 --- a/intl/unicharutil/util/nsUnicodeProperties.cpp +++ b/intl/unicharutil/util/nsUnicodeProperties.cpp @@ -177,7 +177,8 @@ enum HSType { }; static HSType GetHangulSyllableType(uint32_t aCh) { - return HSType(u_getIntPropertyValue(aCh, UCHAR_HANGUL_SYLLABLE_TYPE)); + return HSType(intl::UnicodeProperties::GetIntPropertyValue( + aCh, intl::UnicodeProperties::IntProperty::HangulSyllableType)); } void ClusterIterator::Next() { diff --git a/intl/unicharutil/util/nsUnicodeProperties.h b/intl/unicharutil/util/nsUnicodeProperties.h index 970699f92bbd..1b54893f341c 100644 --- a/intl/unicharutil/util/nsUnicodeProperties.h +++ b/intl/unicharutil/util/nsUnicodeProperties.h @@ -7,14 +7,13 @@ #ifndef NS_UNICODEPROPERTIES_H #define NS_UNICODEPROPERTIES_H +#include "mozilla/intl/UnicodeProperties.h" + #include "nsBidiUtils.h" #include "nsUGenCategory.h" #include "nsUnicodeScriptCodes.h" #include "harfbuzz/hb.h" -#include "unicode/uchar.h" -#include "unicode/uscript.h" - const nsCharProps2& GetCharProps2(uint32_t aCh); namespace mozilla { @@ -57,32 +56,17 @@ const uint32_t kEmojiSkinToneLast = 0x1f3ff; extern const hb_unicode_general_category_t sICUtoHBcategory[]; -inline uint32_t GetMirroredChar(uint32_t aCh) { return u_charMirror(aCh); } - -inline bool HasMirroredChar(uint32_t aCh) { return u_isMirrored(aCh); } - -inline uint8_t GetCombiningClass(uint32_t aCh) { - return u_getCombiningClass(aCh); -} - inline uint8_t GetGeneralCategory(uint32_t aCh) { - return sICUtoHBcategory[u_charType(aCh)]; + return sICUtoHBcategory[intl::UnicodeProperties::CharType(aCh)]; } inline nsCharType GetBidiCat(uint32_t aCh) { return nsCharType(u_charDirection(aCh)); } -inline int8_t GetNumericValue(uint32_t aCh) { - UNumericType type = - UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE)); - return type == U_NT_DECIMAL || type == U_NT_DIGIT - ? int8_t(u_getNumericValue(aCh)) - : -1; -} - inline uint8_t GetLineBreakClass(uint32_t aCh) { - return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK); + return intl::UnicodeProperties::GetIntPropertyValue( + aCh, intl::UnicodeProperties::IntProperty::LineBreak); } inline Script GetScriptCode(uint32_t aCh) { @@ -104,28 +88,22 @@ inline uint32_t GetScriptTagForCode(Script aScriptCode) { } inline PairedBracketType GetPairedBracketType(uint32_t aCh) { - return PairedBracketType( - u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE)); + return PairedBracketType(intl::UnicodeProperties::GetIntPropertyValue( + aCh, intl::UnicodeProperties::IntProperty::BidiPairedBracketType)); } -inline uint32_t GetPairedBracket(uint32_t aCh) { - return u_getBidiPairedBracket(aCh); -} - -inline uint32_t GetUppercase(uint32_t aCh) { return u_toupper(aCh); } - -inline uint32_t GetLowercase(uint32_t aCh) { return u_tolower(aCh); } - inline uint32_t GetTitlecaseForLower( uint32_t aCh) // maps LC to titlecase, UC unchanged { - return u_isULowercase(aCh) ? u_totitle(aCh) : aCh; + return intl::UnicodeProperties::IsLowercase(aCh) + ? intl::UnicodeProperties::ToTitle(aCh) + : aCh; } inline uint32_t GetTitlecaseForAll( uint32_t aCh) // maps both UC and LC to titlecase { - return u_totitle(aCh); + return intl::UnicodeProperties::ToTitle(aCh); } inline uint32_t GetFoldedcase(uint32_t aCh) { @@ -135,62 +113,22 @@ inline uint32_t GetFoldedcase(uint32_t aCh) { if (aCh == 0x0130 || aCh == 0x0131) { return 'i'; } - return u_foldCase(aCh, U_FOLD_CASE_DEFAULT); -} - -inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) { - switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) { - case U_EA_FULLWIDTH: - case U_EA_HALFWIDTH: - return true; - case U_EA_WIDE: - return u_hasBinaryProperty(aCh, UCHAR_EMOJI) ? false : true; - case U_EA_AMBIGUOUS: - case U_EA_NARROW: - case U_EA_NEUTRAL: - return false; - } - return false; -} - -inline bool IsEastAsianWidthAFW(uint32_t aCh) { - switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) { - case U_EA_AMBIGUOUS: - case U_EA_FULLWIDTH: - case U_EA_WIDE: - return true; - case U_EA_HALFWIDTH: - case U_EA_NARROW: - case U_EA_NEUTRAL: - return false; - } - return false; -} - -inline bool IsEastAsianWidthFW(uint32_t aCh) { - switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) { - case U_EA_FULLWIDTH: - case U_EA_WIDE: - return true; - case U_EA_AMBIGUOUS: - case U_EA_HALFWIDTH: - case U_EA_NARROW: - case U_EA_NEUTRAL: - return false; - } - return false; + return intl::UnicodeProperties::FoldCase(aCh); } inline bool IsDefaultIgnorable(uint32_t aCh) { - return u_hasBinaryProperty(aCh, UCHAR_DEFAULT_IGNORABLE_CODE_POINT); + return intl::UnicodeProperties::HasBinaryProperty( + aCh, intl::UnicodeProperties::BinaryProperty::DefaultIgnorableCodePoint); } inline EmojiPresentation GetEmojiPresentation(uint32_t aCh) { - if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) { + if (!intl::UnicodeProperties::HasBinaryProperty( + aCh, intl::UnicodeProperties::BinaryProperty::Emoji)) { return TextOnly; } - if (u_hasBinaryProperty(aCh, UCHAR_EMOJI_PRESENTATION)) { + if (intl::UnicodeProperties::HasBinaryProperty( + aCh, intl::UnicodeProperties::BinaryProperty::EmojiPresentation)) { return EmojiDefault; } return TextDefault; @@ -271,11 +209,6 @@ uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength); // to the values we care about at runtime. bool IsCombiningDiacritic(uint32_t aCh); -// Keep this function in sync with is_math_symbol in base_chars.py. -inline bool IsMathOrMusicSymbol(uint32_t aCh) { - return u_charType(aCh) == U_MATH_SYMBOL || u_charType(aCh) == U_OTHER_SYMBOL; -} - // Remove diacritics from a character uint32_t GetNaked(uint32_t aCh); diff --git a/netwerk/dns/nsIDNService.cpp b/netwerk/dns/nsIDNService.cpp index c0ffa6f9aca9..cc836c617954 100644 --- a/netwerk/dns/nsIDNService.cpp +++ b/netwerk/dns/nsIDNService.cpp @@ -19,6 +19,7 @@ #include "mozilla/TextUtils.h" #include "mozilla/Utf8.h" #include "mozilla/intl/Script.h" +#include "mozilla/intl/UnicodeProperties.h" // Currently we use the non-transitional processing option -- see // http://unicode.org/reports/tr46/ @@ -774,7 +775,8 @@ bool nsIDNService::isLabelSafe(const nsAString& label) { // Check for mixed numbering systems auto genCat = GetGeneralCategory(ch); if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) { - uint32_t zeroCharacter = ch - GetNumericValue(ch); + uint32_t zeroCharacter = + ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch); if (savedNumberingSystem == 0) { // If we encounter a decimal number, save the zero character from that // numbering system. diff --git a/toolkit/components/find/nsFind.cpp b/toolkit/components/find/nsFind.cpp index fe18c0987562..4dc90a6fce11 100644 --- a/toolkit/components/find/nsFind.cpp +++ b/toolkit/components/find/nsFind.cpp @@ -31,6 +31,7 @@ #include "mozilla/dom/HTMLOptionElement.h" #include "mozilla/dom/HTMLSelectElement.h" #include "mozilla/dom/Text.h" +#include "mozilla/intl/UnicodeProperties.h" #include "mozilla/intl/WordBreaker.h" #include "mozilla/StaticPrefs_browser.h" @@ -775,7 +776,7 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange, // already guaranteed to not be a combining diacritical mark.) c = (t2b ? DecodeChar(t2b, &findex) : CHAR_TO_UNICHAR(t1b[findex])); if (!mMatchDiacritics && IsCombiningDiacritic(c) && - !IsMathOrMusicSymbol(prevChar)) { + !intl::UnicodeProperties::IsMathOrMusicSymbol(prevChar)) { continue; } patc = DecodeChar(patStr, &pindex); diff --git a/toolkit/components/places/tests/gtest/test_casing.cpp b/toolkit/components/places/tests/gtest/test_casing.cpp index 1668f2f5a6c3..079d64bbd070 100644 --- a/toolkit/components/places/tests/gtest/test_casing.cpp +++ b/toolkit/components/places/tests/gtest/test_casing.cpp @@ -5,7 +5,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "gtest/gtest.h" -#include "nsUnicodeProperties.h" +#include "mozilla/intl/UnicodeProperties.h" // Verify the assertion in SQLFunctions.cpp / nextSearchCandidate that the // only non-ASCII characters that lower-case to ASCII ones are: @@ -15,7 +15,7 @@ TEST(MatchAutocompleteCasing, CaseAssumption) { for (uint32_t c = 128; c < 0x110000; c++) { if (c != 304 && c != 8490) { - ASSERT_GE(mozilla::unicode::GetLowercase(c), 128U); + ASSERT_GE(mozilla::intl::UnicodeProperties::ToLower(c), 128U); } } } @@ -24,6 +24,6 @@ TEST(MatchAutocompleteCasing, CaseAssumption) TEST(MatchAutocompleteCasing, CaseAssumption2) { for (uint32_t c = 0; c < 128; c++) { - ASSERT_LT(mozilla::unicode::GetLowercase(c), 128U); + ASSERT_LT(mozilla::intl::UnicodeProperties::ToLower(c), 128U); } }