Bug 1719554 - Unify most of nsUnicodeProperties.h; r=platform-i18n-reviewers,jfkthame,gregtatum,necko-reviewers,valentin

This unifies most of the calls in nsUnicodeProperties.h. CharType and Script
will be handled in subsequent patches on this bug.

Differential Revision: https://phabricator.services.mozilla.com/D132273
This commit is contained in:
Dan Minor 2021-12-06 18:15:49 +00:00
Родитель 517f887537
Коммит e12c3387e8
13 изменённых файлов: 286 добавлений и 114 удалений

Просмотреть файл

@ -215,6 +215,7 @@
#include "mozilla/dom/Element.h"
#include "mozilla/dom/HTMLSlotElement.h"
#include "mozilla/dom/ShadowRoot.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "nsTextFragment.h"
#include "nsAttrValue.h"

Просмотреть файл

@ -22,7 +22,6 @@
#include "nsContentUtils.h"
#include "nsReadableUtils.h"
#include "nsUnicharUtils.h"
#include "nsUnicodeProperties.h"
#include "nsCRT.h"
#include "mozilla/Casting.h"
#include "mozilla/EditorUtils.h"
@ -31,6 +30,8 @@
#include "mozilla/dom/HTMLBRElement.h"
#include "mozilla/dom/Text.h"
#include "mozilla/intl/Segmenter.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "mozilla/Span.h"
#include "mozilla/Preferences.h"
#include "mozilla/StaticPrefs_converter.h"
@ -1803,7 +1804,7 @@ int32_t GetUnicharWidth(char32_t aCh) {
return 1;
}
return unicode::IsEastAsianWidthFW(aCh) ? 2 : 1;
return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1;
}
int32_t GetUnicharStringWidth(Span<const char16_t> aString) {

Просмотреть файл

@ -11,6 +11,7 @@
#include "gfxTextRun.h"
#include "mozilla/Sprintf.h"
#include "mozilla/intl/String.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "nsUnicodeScriptCodes.h"
@ -981,7 +982,7 @@ static hb_position_t HBGetHKerning(hb_font_t* font, void* font_data,
static hb_codepoint_t HBGetMirroring(hb_unicode_funcs_t* ufuncs,
hb_codepoint_t aCh, void* user_data) {
return GetMirroredChar(aCh);
return intl::UnicodeProperties::CharMirror(aCh);
}
static hb_unicode_general_category_t HBGetGeneralCategory(
@ -996,13 +997,14 @@ static hb_script_t HBGetScript(hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh,
static hb_unicode_combining_class_t HBGetCombiningClass(
hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, void* user_data) {
return hb_unicode_combining_class_t(GetCombiningClass(aCh));
return hb_unicode_combining_class_t(
intl::UnicodeProperties::GetCombiningClass(aCh));
}
static hb_bool_t HBUnicodeCompose(hb_unicode_funcs_t* ufuncs, hb_codepoint_t a,
hb_codepoint_t b, hb_codepoint_t* ab,
void* user_data) {
char32_t ch = mozilla::intl::String::ComposePairNFC(a, b);
char32_t ch = intl::String::ComposePairNFC(a, b);
if (ch > 0) {
*ab = ch;
return true;
@ -1025,7 +1027,7 @@ static hb_bool_t HBUnicodeDecompose(hb_unicode_funcs_t* ufuncs,
#endif
char32_t decomp[2] = {0};
if (mozilla::intl::String::DecomposeRawNFD(ab, decomp)) {
if (intl::String::DecomposeRawNFD(ab, decomp)) {
if (decomp[1] || decomp[0] != ab) {
*a = decomp[0];
*b = decomp[1];

Просмотреть файл

@ -49,6 +49,7 @@
#include "gfxScriptItemizer.h"
#include "mozilla/intl/Script.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "nsCharTraits.h"
#include "harfbuzz/hb.h"
@ -177,12 +178,12 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
*/
gc = GetGeneralCategory(ch);
if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
uint32_t endPairChar = mozilla::intl::UnicodeProperties::CharMirror(ch);
if (endPairChar != ch) {
push(endPairChar, scriptCode);
}
} else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
HasMirroredChar(ch)) {
mozilla::intl::UnicodeProperties::IsMirrored(ch)) {
while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
pop();
}
@ -220,7 +221,7 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
* pop the matching open character from the stack
*/
if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
HasMirroredChar(ch)) {
mozilla::intl::UnicodeProperties::IsMirrored(ch)) {
pop();
}
} else {

Просмотреть файл

@ -30,6 +30,7 @@ EXPORTS.mozilla.intl = [
"src/Script.h",
"src/String.h",
"src/TimeZone.h",
"src/UnicodeProperties.h",
]
UNIFIED_SOURCES += [

Просмотреть файл

@ -0,0 +1,219 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef intl_components_UnicodeProperties_h_
#define intl_components_UnicodeProperties_h_
#include "unicode/uchar.h"
#include "unicode/uscript.h"
namespace mozilla::intl {
/**
* This component is a Mozilla-focused API for working with text properties.
*/
class UnicodeProperties final {
public:
/**
* Maps the specified character to a "mirror-image" character.
*/
static inline uint32_t CharMirror(uint32_t aCh) { return u_charMirror(aCh); }
/**
* Return the general category value for the code point.
*/
static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); }
/**
* Determine whether the code point has the Bidi_Mirrored property.
*/
static inline bool IsMirrored(uint32_t aCh) { return u_isMirrored(aCh); }
/**
* Returns the combining class of the code point as specified in
* UnicodeData.txt.
*/
static inline uint8_t GetCombiningClass(uint32_t aCh) {
return u_getCombiningClass(aCh);
}
enum class IntProperty {
BidiPairedBracketType,
EastAsianWidth,
HangulSyllableType,
LineBreak,
NumericType,
};
/**
* Get the property value for an enumerated or integer Unicode property for a
* code point.
*/
static inline int32_t GetIntPropertyValue(uint32_t aCh, IntProperty aProp) {
UProperty prop;
switch (aProp) {
case IntProperty::BidiPairedBracketType:
prop = UCHAR_BIDI_PAIRED_BRACKET_TYPE;
break;
case IntProperty::EastAsianWidth:
prop = UCHAR_EAST_ASIAN_WIDTH;
break;
case IntProperty::HangulSyllableType:
prop = UCHAR_HANGUL_SYLLABLE_TYPE;
break;
case IntProperty::LineBreak:
prop = UCHAR_LINE_BREAK;
break;
case IntProperty::NumericType:
prop = UCHAR_NUMERIC_TYPE;
break;
}
return u_getIntPropertyValue(aCh, prop);
}
/**
* Get the numeric value for a Unicode code point as defined in the
* Unicode Character Database if the input is decimal or a digit,
* otherwise, returns -1.
*/
static inline int8_t GetNumericValue(uint32_t aCh) {
UNumericType type =
UNumericType(GetIntPropertyValue(aCh, IntProperty::NumericType));
return type == U_NT_DECIMAL || type == U_NT_DIGIT
? int8_t(u_getNumericValue(aCh))
: -1;
}
/**
* Maps the specified character to its paired bracket character.
*/
static inline uint32_t GetBidiPairedBracket(uint32_t aCh) {
return u_getBidiPairedBracket(aCh);
}
/**
* The given character is mapped to its uppercase equivalent according to
* UnicodeData.txt; if the character has no uppercase equivalent, the
* character itself is returned.
*/
static inline uint32_t ToUpper(uint32_t aCh) { return u_toupper(aCh); }
/**
* The given character is mapped to its lowercase equivalent according to
* UnicodeData.txt; if the character has no lowercase equivalent, the
* character itself is returned.
*/
static inline uint32_t ToLower(uint32_t aCh) { return u_tolower(aCh); }
/**
* Check if a code point has the Lowercase Unicode property.
*/
static inline bool IsLowercase(uint32_t aCh) { return u_isULowercase(aCh); }
/**
* The given character is mapped to its titlecase equivalent according to
* UnicodeData.txt; if the character has no titlecase equivalent, the
* character itself is returned.
*/
static inline uint32_t ToTitle(uint32_t aCh) { return u_totitle(aCh); }
/**
* The given character is mapped to its case folding equivalent according to
* UnicodeData.txt and CaseFolding.txt;
* if the character has no case folding equivalent, the character
* itself is returned.
*/
static inline uint32_t FoldCase(uint32_t aCh) {
return u_foldCase(aCh, U_FOLD_CASE_DEFAULT);
}
enum class BinaryProperty {
DefaultIgnorableCodePoint,
Emoji,
EmojiPresentation,
};
/**
* Check a binary Unicode property for a code point.
*/
static inline bool HasBinaryProperty(uint32_t aCh, BinaryProperty aProp) {
UProperty prop;
switch (aProp) {
case BinaryProperty::DefaultIgnorableCodePoint:
prop = UCHAR_DEFAULT_IGNORABLE_CODE_POINT;
break;
case BinaryProperty::Emoji:
prop = UCHAR_EMOJI;
break;
case BinaryProperty::EmojiPresentation:
prop = UCHAR_EMOJI_PRESENTATION;
break;
}
return u_hasBinaryProperty(aCh, prop);
}
/**
* Check if the width of aCh is full width, half width or wide
* excluding emoji.
*/
static inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) {
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
case U_EA_FULLWIDTH:
case U_EA_HALFWIDTH:
return true;
case U_EA_WIDE:
return HasBinaryProperty(aCh, BinaryProperty::Emoji) ? false : true;
case U_EA_AMBIGUOUS:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
/**
* Check if the width of aCh is ambiguous, full width, or wide.
*/
static inline bool IsEastAsianWidthAFW(uint32_t aCh) {
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
case U_EA_AMBIGUOUS:
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
/**
* Check if the width of aCh is full width, or wide.
*/
static inline bool IsEastAsianWidthFW(uint32_t aCh) {
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_AMBIGUOUS:
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
/**
* Check if the CharType of aCh is math or other symbol.
*/
static inline bool IsMathOrMusicSymbol(uint32_t aCh) {
// Keep this function in sync with is_math_symbol in base_chars.py.
return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL;
}
};
} // namespace mozilla::intl
#endif

Просмотреть файл

@ -11,6 +11,7 @@
#include "nsUnicodeProperties.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/intl/Segmenter.h"
#include "mozilla/intl/UnicodeProperties.h"
using namespace mozilla::unicode;
using namespace mozilla::intl;
@ -462,10 +463,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (aIsChineseOrJapanese) {
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_POSTFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_PREFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_OPEN_LIKE_CHARACTER;
}
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
@ -485,10 +488,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (aIsChineseOrJapanese) {
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_POSTFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_PREFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_OPEN_LIKE_CHARACTER;
}
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
@ -513,10 +518,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
u == 0xFF01 || u == 0xFF1F) {
return CLASS_BREAKABLE;
}
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_POSTFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_BREAKABLE;
}
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_PREFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_BREAKABLE;
}
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {

Просмотреть файл

@ -8,6 +8,7 @@
#include "nsUnicodeProperties.h"
#include "mozilla/Likely.h"
#include "mozilla/HashFunctions.h"
#include "mozilla/intl/UnicodeProperties.h"
// We map x -> x, except for upper-case letters,
// which we map to their lower-case equivalents.
@ -33,7 +34,7 @@ static MOZ_ALWAYS_INLINE uint32_t ToLowerCase_inline(uint32_t aChar) {
return gASCIIToLower[aChar];
}
return mozilla::unicode::GetLowercase(aChar);
return mozilla::intl::UnicodeProperties::ToLower(aChar);
}
static MOZ_ALWAYS_INLINE uint32_t
@ -244,7 +245,8 @@ void ToLowerCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) {
for (uint32_t i = 0; i < aLen; i++) {
uint32_t ch = aIn[i];
if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) {
ch = mozilla::unicode::GetLowercase(SURROGATE_TO_UCS4(ch, aIn[i + 1]));
ch = mozilla::intl::UnicodeProperties::ToLower(
SURROGATE_TO_UCS4(ch, aIn[i + 1]));
NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!");
aOut[i++] = H_SURROGATE(ch);
aOut[i] = L_SURROGATE(ch);
@ -269,14 +271,15 @@ uint32_t ToUpperCase(uint32_t aChar) {
return aChar;
}
return mozilla::unicode::GetUppercase(aChar);
return mozilla::intl::UnicodeProperties::ToUpper(aChar);
}
void ToUpperCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) {
for (uint32_t i = 0; i < aLen; i++) {
uint32_t ch = aIn[i];
if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) {
ch = mozilla::unicode::GetUppercase(SURROGATE_TO_UCS4(ch, aIn[i + 1]));
ch = mozilla::intl::UnicodeProperties::ToUpper(
SURROGATE_TO_UCS4(ch, aIn[i + 1]));
NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!");
aOut[i++] = H_SURROGATE(ch);
aOut[i] = L_SURROGATE(ch);
@ -362,7 +365,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline(
// we don't go through ToLowerCase here, because we know this isn't
// an ASCII character so the ASCII fast-path there is useless
c = mozilla::unicode::GetLowercase(c);
c = mozilla::intl::UnicodeProperties::ToLower(c);
*aNext = aStr + 2;
return c;
@ -377,7 +380,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline(
c += (str[1] & 0x3F) << 6;
c += (str[2] & 0x3F);
c = mozilla::unicode::GetLowercase(c);
c = mozilla::intl::UnicodeProperties::ToLower(c);
*aNext = aStr + 3;
return c;
@ -392,7 +395,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline(
c += (str[2] & 0x3F) << 6;
c += (str[3] & 0x3F);
c = mozilla::unicode::GetLowercase(c);
c = mozilla::intl::UnicodeProperties::ToLower(c);
*aNext = aStr + 4;
return c;
@ -514,7 +517,7 @@ uint32_t HashUTF8AsUTF16(const char* aUTF8, uint32_t aLength, bool* aErr) {
}
bool IsSegmentBreakSkipChar(uint32_t u) {
return unicode::IsEastAsianWidthFHWexcludingEmoji(u) &&
return intl::UnicodeProperties::IsEastAsianWidthFHWexcludingEmoji(u) &&
unicode::GetScriptCode(u) != unicode::Script::HANGUL;
}

Просмотреть файл

@ -177,7 +177,8 @@ enum HSType {
};
static HSType GetHangulSyllableType(uint32_t aCh) {
return HSType(u_getIntPropertyValue(aCh, UCHAR_HANGUL_SYLLABLE_TYPE));
return HSType(intl::UnicodeProperties::GetIntPropertyValue(
aCh, intl::UnicodeProperties::IntProperty::HangulSyllableType));
}
void ClusterIterator::Next() {

Просмотреть файл

@ -7,14 +7,13 @@
#ifndef NS_UNICODEPROPERTIES_H
#define NS_UNICODEPROPERTIES_H
#include "mozilla/intl/UnicodeProperties.h"
#include "nsBidiUtils.h"
#include "nsUGenCategory.h"
#include "nsUnicodeScriptCodes.h"
#include "harfbuzz/hb.h"
#include "unicode/uchar.h"
#include "unicode/uscript.h"
const nsCharProps2& GetCharProps2(uint32_t aCh);
namespace mozilla {
@ -57,32 +56,17 @@ const uint32_t kEmojiSkinToneLast = 0x1f3ff;
extern const hb_unicode_general_category_t sICUtoHBcategory[];
inline uint32_t GetMirroredChar(uint32_t aCh) { return u_charMirror(aCh); }
inline bool HasMirroredChar(uint32_t aCh) { return u_isMirrored(aCh); }
inline uint8_t GetCombiningClass(uint32_t aCh) {
return u_getCombiningClass(aCh);
}
inline uint8_t GetGeneralCategory(uint32_t aCh) {
return sICUtoHBcategory[u_charType(aCh)];
return sICUtoHBcategory[intl::UnicodeProperties::CharType(aCh)];
}
inline nsCharType GetBidiCat(uint32_t aCh) {
return nsCharType(u_charDirection(aCh));
}
inline int8_t GetNumericValue(uint32_t aCh) {
UNumericType type =
UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
return type == U_NT_DECIMAL || type == U_NT_DIGIT
? int8_t(u_getNumericValue(aCh))
: -1;
}
inline uint8_t GetLineBreakClass(uint32_t aCh) {
return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK);
return intl::UnicodeProperties::GetIntPropertyValue(
aCh, intl::UnicodeProperties::IntProperty::LineBreak);
}
inline Script GetScriptCode(uint32_t aCh) {
@ -104,28 +88,22 @@ inline uint32_t GetScriptTagForCode(Script aScriptCode) {
}
inline PairedBracketType GetPairedBracketType(uint32_t aCh) {
return PairedBracketType(
u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
return PairedBracketType(intl::UnicodeProperties::GetIntPropertyValue(
aCh, intl::UnicodeProperties::IntProperty::BidiPairedBracketType));
}
inline uint32_t GetPairedBracket(uint32_t aCh) {
return u_getBidiPairedBracket(aCh);
}
inline uint32_t GetUppercase(uint32_t aCh) { return u_toupper(aCh); }
inline uint32_t GetLowercase(uint32_t aCh) { return u_tolower(aCh); }
inline uint32_t GetTitlecaseForLower(
uint32_t aCh) // maps LC to titlecase, UC unchanged
{
return u_isULowercase(aCh) ? u_totitle(aCh) : aCh;
return intl::UnicodeProperties::IsLowercase(aCh)
? intl::UnicodeProperties::ToTitle(aCh)
: aCh;
}
inline uint32_t GetTitlecaseForAll(
uint32_t aCh) // maps both UC and LC to titlecase
{
return u_totitle(aCh);
return intl::UnicodeProperties::ToTitle(aCh);
}
inline uint32_t GetFoldedcase(uint32_t aCh) {
@ -135,62 +113,22 @@ inline uint32_t GetFoldedcase(uint32_t aCh) {
if (aCh == 0x0130 || aCh == 0x0131) {
return 'i';
}
return u_foldCase(aCh, U_FOLD_CASE_DEFAULT);
}
inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) {
switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
case U_EA_FULLWIDTH:
case U_EA_HALFWIDTH:
return true;
case U_EA_WIDE:
return u_hasBinaryProperty(aCh, UCHAR_EMOJI) ? false : true;
case U_EA_AMBIGUOUS:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
inline bool IsEastAsianWidthAFW(uint32_t aCh) {
switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
case U_EA_AMBIGUOUS:
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
inline bool IsEastAsianWidthFW(uint32_t aCh) {
switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_AMBIGUOUS:
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
return intl::UnicodeProperties::FoldCase(aCh);
}
inline bool IsDefaultIgnorable(uint32_t aCh) {
return u_hasBinaryProperty(aCh, UCHAR_DEFAULT_IGNORABLE_CODE_POINT);
return intl::UnicodeProperties::HasBinaryProperty(
aCh, intl::UnicodeProperties::BinaryProperty::DefaultIgnorableCodePoint);
}
inline EmojiPresentation GetEmojiPresentation(uint32_t aCh) {
if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) {
if (!intl::UnicodeProperties::HasBinaryProperty(
aCh, intl::UnicodeProperties::BinaryProperty::Emoji)) {
return TextOnly;
}
if (u_hasBinaryProperty(aCh, UCHAR_EMOJI_PRESENTATION)) {
if (intl::UnicodeProperties::HasBinaryProperty(
aCh, intl::UnicodeProperties::BinaryProperty::EmojiPresentation)) {
return EmojiDefault;
}
return TextDefault;
@ -271,11 +209,6 @@ uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength);
// to the values we care about at runtime.
bool IsCombiningDiacritic(uint32_t aCh);
// Keep this function in sync with is_math_symbol in base_chars.py.
inline bool IsMathOrMusicSymbol(uint32_t aCh) {
return u_charType(aCh) == U_MATH_SYMBOL || u_charType(aCh) == U_OTHER_SYMBOL;
}
// Remove diacritics from a character
uint32_t GetNaked(uint32_t aCh);

Просмотреть файл

@ -19,6 +19,7 @@
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
#include "mozilla/intl/Script.h"
#include "mozilla/intl/UnicodeProperties.h"
// Currently we use the non-transitional processing option -- see
// http://unicode.org/reports/tr46/
@ -774,7 +775,8 @@ bool nsIDNService::isLabelSafe(const nsAString& label) {
// Check for mixed numbering systems
auto genCat = GetGeneralCategory(ch);
if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
uint32_t zeroCharacter = ch - GetNumericValue(ch);
uint32_t zeroCharacter =
ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
if (savedNumberingSystem == 0) {
// If we encounter a decimal number, save the zero character from that
// numbering system.

Просмотреть файл

@ -31,6 +31,7 @@
#include "mozilla/dom/HTMLOptionElement.h"
#include "mozilla/dom/HTMLSelectElement.h"
#include "mozilla/dom/Text.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "mozilla/intl/WordBreaker.h"
#include "mozilla/StaticPrefs_browser.h"
@ -775,7 +776,7 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,
// already guaranteed to not be a combining diacritical mark.)
c = (t2b ? DecodeChar(t2b, &findex) : CHAR_TO_UNICHAR(t1b[findex]));
if (!mMatchDiacritics && IsCombiningDiacritic(c) &&
!IsMathOrMusicSymbol(prevChar)) {
!intl::UnicodeProperties::IsMathOrMusicSymbol(prevChar)) {
continue;
}
patc = DecodeChar(patStr, &pindex);

Просмотреть файл

@ -5,7 +5,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "gtest/gtest.h"
#include "nsUnicodeProperties.h"
#include "mozilla/intl/UnicodeProperties.h"
// Verify the assertion in SQLFunctions.cpp / nextSearchCandidate that the
// only non-ASCII characters that lower-case to ASCII ones are:
@ -15,7 +15,7 @@ TEST(MatchAutocompleteCasing, CaseAssumption)
{
for (uint32_t c = 128; c < 0x110000; c++) {
if (c != 304 && c != 8490) {
ASSERT_GE(mozilla::unicode::GetLowercase(c), 128U);
ASSERT_GE(mozilla::intl::UnicodeProperties::ToLower(c), 128U);
}
}
}
@ -24,6 +24,6 @@ TEST(MatchAutocompleteCasing, CaseAssumption)
TEST(MatchAutocompleteCasing, CaseAssumption2)
{
for (uint32_t c = 0; c < 128; c++) {
ASSERT_LT(mozilla::unicode::GetLowercase(c), 128U);
ASSERT_LT(mozilla::intl::UnicodeProperties::ToLower(c), 128U);
}
}