зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1719554 - Unify most of nsUnicodeProperties.h; r=platform-i18n-reviewers,jfkthame,gregtatum,necko-reviewers,valentin
This unifies most of the calls in nsUnicodeProperties.h. CharType and Script will be handled in subsequent patches on this bug. Differential Revision: https://phabricator.services.mozilla.com/D132273
This commit is contained in:
Родитель
517f887537
Коммит
e12c3387e8
|
@ -215,6 +215,7 @@
|
|||
#include "mozilla/dom/Element.h"
|
||||
#include "mozilla/dom/HTMLSlotElement.h"
|
||||
#include "mozilla/dom/ShadowRoot.h"
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
#include "nsUnicodeProperties.h"
|
||||
#include "nsTextFragment.h"
|
||||
#include "nsAttrValue.h"
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include "nsContentUtils.h"
|
||||
#include "nsReadableUtils.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
#include "nsUnicodeProperties.h"
|
||||
#include "nsCRT.h"
|
||||
#include "mozilla/Casting.h"
|
||||
#include "mozilla/EditorUtils.h"
|
||||
|
@ -31,6 +30,8 @@
|
|||
#include "mozilla/dom/HTMLBRElement.h"
|
||||
#include "mozilla/dom/Text.h"
|
||||
#include "mozilla/intl/Segmenter.h"
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
#include "nsUnicodeProperties.h"
|
||||
#include "mozilla/Span.h"
|
||||
#include "mozilla/Preferences.h"
|
||||
#include "mozilla/StaticPrefs_converter.h"
|
||||
|
@ -1803,7 +1804,7 @@ int32_t GetUnicharWidth(char32_t aCh) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
return unicode::IsEastAsianWidthFW(aCh) ? 2 : 1;
|
||||
return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1;
|
||||
}
|
||||
|
||||
int32_t GetUnicharStringWidth(Span<const char16_t> aString) {
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "gfxTextRun.h"
|
||||
#include "mozilla/Sprintf.h"
|
||||
#include "mozilla/intl/String.h"
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
#include "nsUnicodeProperties.h"
|
||||
#include "nsUnicodeScriptCodes.h"
|
||||
|
||||
|
@ -981,7 +982,7 @@ static hb_position_t HBGetHKerning(hb_font_t* font, void* font_data,
|
|||
|
||||
static hb_codepoint_t HBGetMirroring(hb_unicode_funcs_t* ufuncs,
|
||||
hb_codepoint_t aCh, void* user_data) {
|
||||
return GetMirroredChar(aCh);
|
||||
return intl::UnicodeProperties::CharMirror(aCh);
|
||||
}
|
||||
|
||||
static hb_unicode_general_category_t HBGetGeneralCategory(
|
||||
|
@ -996,13 +997,14 @@ static hb_script_t HBGetScript(hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh,
|
|||
|
||||
static hb_unicode_combining_class_t HBGetCombiningClass(
|
||||
hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, void* user_data) {
|
||||
return hb_unicode_combining_class_t(GetCombiningClass(aCh));
|
||||
return hb_unicode_combining_class_t(
|
||||
intl::UnicodeProperties::GetCombiningClass(aCh));
|
||||
}
|
||||
|
||||
static hb_bool_t HBUnicodeCompose(hb_unicode_funcs_t* ufuncs, hb_codepoint_t a,
|
||||
hb_codepoint_t b, hb_codepoint_t* ab,
|
||||
void* user_data) {
|
||||
char32_t ch = mozilla::intl::String::ComposePairNFC(a, b);
|
||||
char32_t ch = intl::String::ComposePairNFC(a, b);
|
||||
if (ch > 0) {
|
||||
*ab = ch;
|
||||
return true;
|
||||
|
@ -1025,7 +1027,7 @@ static hb_bool_t HBUnicodeDecompose(hb_unicode_funcs_t* ufuncs,
|
|||
#endif
|
||||
|
||||
char32_t decomp[2] = {0};
|
||||
if (mozilla::intl::String::DecomposeRawNFD(ab, decomp)) {
|
||||
if (intl::String::DecomposeRawNFD(ab, decomp)) {
|
||||
if (decomp[1] || decomp[0] != ab) {
|
||||
*a = decomp[0];
|
||||
*b = decomp[1];
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
|
||||
#include "gfxScriptItemizer.h"
|
||||
#include "mozilla/intl/Script.h"
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
#include "nsUnicodeProperties.h"
|
||||
#include "nsCharTraits.h"
|
||||
#include "harfbuzz/hb.h"
|
||||
|
@ -177,12 +178,12 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
|
|||
*/
|
||||
gc = GetGeneralCategory(ch);
|
||||
if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
|
||||
uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
|
||||
uint32_t endPairChar = mozilla::intl::UnicodeProperties::CharMirror(ch);
|
||||
if (endPairChar != ch) {
|
||||
push(endPairChar, scriptCode);
|
||||
}
|
||||
} else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
|
||||
HasMirroredChar(ch)) {
|
||||
mozilla::intl::UnicodeProperties::IsMirrored(ch)) {
|
||||
while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
|
||||
pop();
|
||||
}
|
||||
|
@ -220,7 +221,7 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
|
|||
* pop the matching open character from the stack
|
||||
*/
|
||||
if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
|
||||
HasMirroredChar(ch)) {
|
||||
mozilla::intl::UnicodeProperties::IsMirrored(ch)) {
|
||||
pop();
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -30,6 +30,7 @@ EXPORTS.mozilla.intl = [
|
|||
"src/Script.h",
|
||||
"src/String.h",
|
||||
"src/TimeZone.h",
|
||||
"src/UnicodeProperties.h",
|
||||
]
|
||||
|
||||
UNIFIED_SOURCES += [
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#ifndef intl_components_UnicodeProperties_h_
|
||||
#define intl_components_UnicodeProperties_h_
|
||||
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
||||
namespace mozilla::intl {
|
||||
|
||||
/**
|
||||
* This component is a Mozilla-focused API for working with text properties.
|
||||
*/
|
||||
class UnicodeProperties final {
|
||||
public:
|
||||
/**
|
||||
* Maps the specified character to a "mirror-image" character.
|
||||
*/
|
||||
static inline uint32_t CharMirror(uint32_t aCh) { return u_charMirror(aCh); }
|
||||
|
||||
/**
|
||||
* Return the general category value for the code point.
|
||||
*/
|
||||
static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); }
|
||||
|
||||
/**
|
||||
* Determine whether the code point has the Bidi_Mirrored property.
|
||||
*/
|
||||
static inline bool IsMirrored(uint32_t aCh) { return u_isMirrored(aCh); }
|
||||
|
||||
/**
|
||||
* Returns the combining class of the code point as specified in
|
||||
* UnicodeData.txt.
|
||||
*/
|
||||
static inline uint8_t GetCombiningClass(uint32_t aCh) {
|
||||
return u_getCombiningClass(aCh);
|
||||
}
|
||||
|
||||
enum class IntProperty {
|
||||
BidiPairedBracketType,
|
||||
EastAsianWidth,
|
||||
HangulSyllableType,
|
||||
LineBreak,
|
||||
NumericType,
|
||||
};
|
||||
|
||||
/**
|
||||
* Get the property value for an enumerated or integer Unicode property for a
|
||||
* code point.
|
||||
*/
|
||||
static inline int32_t GetIntPropertyValue(uint32_t aCh, IntProperty aProp) {
|
||||
UProperty prop;
|
||||
switch (aProp) {
|
||||
case IntProperty::BidiPairedBracketType:
|
||||
prop = UCHAR_BIDI_PAIRED_BRACKET_TYPE;
|
||||
break;
|
||||
case IntProperty::EastAsianWidth:
|
||||
prop = UCHAR_EAST_ASIAN_WIDTH;
|
||||
break;
|
||||
case IntProperty::HangulSyllableType:
|
||||
prop = UCHAR_HANGUL_SYLLABLE_TYPE;
|
||||
break;
|
||||
case IntProperty::LineBreak:
|
||||
prop = UCHAR_LINE_BREAK;
|
||||
break;
|
||||
case IntProperty::NumericType:
|
||||
prop = UCHAR_NUMERIC_TYPE;
|
||||
break;
|
||||
}
|
||||
return u_getIntPropertyValue(aCh, prop);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the numeric value for a Unicode code point as defined in the
|
||||
* Unicode Character Database if the input is decimal or a digit,
|
||||
* otherwise, returns -1.
|
||||
*/
|
||||
static inline int8_t GetNumericValue(uint32_t aCh) {
|
||||
UNumericType type =
|
||||
UNumericType(GetIntPropertyValue(aCh, IntProperty::NumericType));
|
||||
return type == U_NT_DECIMAL || type == U_NT_DIGIT
|
||||
? int8_t(u_getNumericValue(aCh))
|
||||
: -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps the specified character to its paired bracket character.
|
||||
*/
|
||||
static inline uint32_t GetBidiPairedBracket(uint32_t aCh) {
|
||||
return u_getBidiPairedBracket(aCh);
|
||||
}
|
||||
|
||||
/**
|
||||
* The given character is mapped to its uppercase equivalent according to
|
||||
* UnicodeData.txt; if the character has no uppercase equivalent, the
|
||||
* character itself is returned.
|
||||
*/
|
||||
static inline uint32_t ToUpper(uint32_t aCh) { return u_toupper(aCh); }
|
||||
|
||||
/**
|
||||
* The given character is mapped to its lowercase equivalent according to
|
||||
* UnicodeData.txt; if the character has no lowercase equivalent, the
|
||||
* character itself is returned.
|
||||
*/
|
||||
static inline uint32_t ToLower(uint32_t aCh) { return u_tolower(aCh); }
|
||||
|
||||
/**
|
||||
* Check if a code point has the Lowercase Unicode property.
|
||||
*/
|
||||
static inline bool IsLowercase(uint32_t aCh) { return u_isULowercase(aCh); }
|
||||
|
||||
/**
|
||||
* The given character is mapped to its titlecase equivalent according to
|
||||
* UnicodeData.txt; if the character has no titlecase equivalent, the
|
||||
* character itself is returned.
|
||||
*/
|
||||
static inline uint32_t ToTitle(uint32_t aCh) { return u_totitle(aCh); }
|
||||
|
||||
/**
|
||||
* The given character is mapped to its case folding equivalent according to
|
||||
* UnicodeData.txt and CaseFolding.txt;
|
||||
* if the character has no case folding equivalent, the character
|
||||
* itself is returned.
|
||||
*/
|
||||
static inline uint32_t FoldCase(uint32_t aCh) {
|
||||
return u_foldCase(aCh, U_FOLD_CASE_DEFAULT);
|
||||
}
|
||||
|
||||
enum class BinaryProperty {
|
||||
DefaultIgnorableCodePoint,
|
||||
Emoji,
|
||||
EmojiPresentation,
|
||||
};
|
||||
|
||||
/**
|
||||
* Check a binary Unicode property for a code point.
|
||||
*/
|
||||
static inline bool HasBinaryProperty(uint32_t aCh, BinaryProperty aProp) {
|
||||
UProperty prop;
|
||||
switch (aProp) {
|
||||
case BinaryProperty::DefaultIgnorableCodePoint:
|
||||
prop = UCHAR_DEFAULT_IGNORABLE_CODE_POINT;
|
||||
break;
|
||||
case BinaryProperty::Emoji:
|
||||
prop = UCHAR_EMOJI;
|
||||
break;
|
||||
case BinaryProperty::EmojiPresentation:
|
||||
prop = UCHAR_EMOJI_PRESENTATION;
|
||||
break;
|
||||
}
|
||||
return u_hasBinaryProperty(aCh, prop);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the width of aCh is full width, half width or wide
|
||||
* excluding emoji.
|
||||
*/
|
||||
static inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) {
|
||||
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
|
||||
case U_EA_FULLWIDTH:
|
||||
case U_EA_HALFWIDTH:
|
||||
return true;
|
||||
case U_EA_WIDE:
|
||||
return HasBinaryProperty(aCh, BinaryProperty::Emoji) ? false : true;
|
||||
case U_EA_AMBIGUOUS:
|
||||
case U_EA_NARROW:
|
||||
case U_EA_NEUTRAL:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the width of aCh is ambiguous, full width, or wide.
|
||||
*/
|
||||
static inline bool IsEastAsianWidthAFW(uint32_t aCh) {
|
||||
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
|
||||
case U_EA_AMBIGUOUS:
|
||||
case U_EA_FULLWIDTH:
|
||||
case U_EA_WIDE:
|
||||
return true;
|
||||
case U_EA_HALFWIDTH:
|
||||
case U_EA_NARROW:
|
||||
case U_EA_NEUTRAL:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the width of aCh is full width, or wide.
|
||||
*/
|
||||
static inline bool IsEastAsianWidthFW(uint32_t aCh) {
|
||||
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
|
||||
case U_EA_FULLWIDTH:
|
||||
case U_EA_WIDE:
|
||||
return true;
|
||||
case U_EA_AMBIGUOUS:
|
||||
case U_EA_HALFWIDTH:
|
||||
case U_EA_NARROW:
|
||||
case U_EA_NEUTRAL:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the CharType of aCh is math or other symbol.
|
||||
*/
|
||||
static inline bool IsMathOrMusicSymbol(uint32_t aCh) {
|
||||
// Keep this function in sync with is_math_symbol in base_chars.py.
|
||||
return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace mozilla::intl
|
||||
|
||||
#endif
|
|
@ -11,6 +11,7 @@
|
|||
#include "nsUnicodeProperties.h"
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
#include "mozilla/intl/Segmenter.h"
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
|
||||
using namespace mozilla::unicode;
|
||||
using namespace mozilla::intl;
|
||||
|
@ -462,10 +463,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
|
|||
return CLASS_CLOSE_LIKE_CHARACTER;
|
||||
}
|
||||
if (aIsChineseOrJapanese) {
|
||||
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
|
||||
if (cls == U_LB_POSTFIX_NUMERIC &&
|
||||
UnicodeProperties::IsEastAsianWidthAFW(u)) {
|
||||
return CLASS_CLOSE_LIKE_CHARACTER;
|
||||
}
|
||||
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
|
||||
if (cls == U_LB_PREFIX_NUMERIC &&
|
||||
UnicodeProperties::IsEastAsianWidthAFW(u)) {
|
||||
return CLASS_OPEN_LIKE_CHARACTER;
|
||||
}
|
||||
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
|
||||
|
@ -485,10 +488,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
|
|||
return CLASS_CLOSE_LIKE_CHARACTER;
|
||||
}
|
||||
if (aIsChineseOrJapanese) {
|
||||
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
|
||||
if (cls == U_LB_POSTFIX_NUMERIC &&
|
||||
UnicodeProperties::IsEastAsianWidthAFW(u)) {
|
||||
return CLASS_CLOSE_LIKE_CHARACTER;
|
||||
}
|
||||
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
|
||||
if (cls == U_LB_PREFIX_NUMERIC &&
|
||||
UnicodeProperties::IsEastAsianWidthAFW(u)) {
|
||||
return CLASS_OPEN_LIKE_CHARACTER;
|
||||
}
|
||||
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
|
||||
|
@ -513,10 +518,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
|
|||
u == 0xFF01 || u == 0xFF1F) {
|
||||
return CLASS_BREAKABLE;
|
||||
}
|
||||
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
|
||||
if (cls == U_LB_POSTFIX_NUMERIC &&
|
||||
UnicodeProperties::IsEastAsianWidthAFW(u)) {
|
||||
return CLASS_BREAKABLE;
|
||||
}
|
||||
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
|
||||
if (cls == U_LB_PREFIX_NUMERIC &&
|
||||
UnicodeProperties::IsEastAsianWidthAFW(u)) {
|
||||
return CLASS_BREAKABLE;
|
||||
}
|
||||
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "nsUnicodeProperties.h"
|
||||
#include "mozilla/Likely.h"
|
||||
#include "mozilla/HashFunctions.h"
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
|
||||
// We map x -> x, except for upper-case letters,
|
||||
// which we map to their lower-case equivalents.
|
||||
|
@ -33,7 +34,7 @@ static MOZ_ALWAYS_INLINE uint32_t ToLowerCase_inline(uint32_t aChar) {
|
|||
return gASCIIToLower[aChar];
|
||||
}
|
||||
|
||||
return mozilla::unicode::GetLowercase(aChar);
|
||||
return mozilla::intl::UnicodeProperties::ToLower(aChar);
|
||||
}
|
||||
|
||||
static MOZ_ALWAYS_INLINE uint32_t
|
||||
|
@ -244,7 +245,8 @@ void ToLowerCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) {
|
|||
for (uint32_t i = 0; i < aLen; i++) {
|
||||
uint32_t ch = aIn[i];
|
||||
if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) {
|
||||
ch = mozilla::unicode::GetLowercase(SURROGATE_TO_UCS4(ch, aIn[i + 1]));
|
||||
ch = mozilla::intl::UnicodeProperties::ToLower(
|
||||
SURROGATE_TO_UCS4(ch, aIn[i + 1]));
|
||||
NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!");
|
||||
aOut[i++] = H_SURROGATE(ch);
|
||||
aOut[i] = L_SURROGATE(ch);
|
||||
|
@ -269,14 +271,15 @@ uint32_t ToUpperCase(uint32_t aChar) {
|
|||
return aChar;
|
||||
}
|
||||
|
||||
return mozilla::unicode::GetUppercase(aChar);
|
||||
return mozilla::intl::UnicodeProperties::ToUpper(aChar);
|
||||
}
|
||||
|
||||
void ToUpperCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) {
|
||||
for (uint32_t i = 0; i < aLen; i++) {
|
||||
uint32_t ch = aIn[i];
|
||||
if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) {
|
||||
ch = mozilla::unicode::GetUppercase(SURROGATE_TO_UCS4(ch, aIn[i + 1]));
|
||||
ch = mozilla::intl::UnicodeProperties::ToUpper(
|
||||
SURROGATE_TO_UCS4(ch, aIn[i + 1]));
|
||||
NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!");
|
||||
aOut[i++] = H_SURROGATE(ch);
|
||||
aOut[i] = L_SURROGATE(ch);
|
||||
|
@ -362,7 +365,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline(
|
|||
|
||||
// we don't go through ToLowerCase here, because we know this isn't
|
||||
// an ASCII character so the ASCII fast-path there is useless
|
||||
c = mozilla::unicode::GetLowercase(c);
|
||||
c = mozilla::intl::UnicodeProperties::ToLower(c);
|
||||
|
||||
*aNext = aStr + 2;
|
||||
return c;
|
||||
|
@ -377,7 +380,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline(
|
|||
c += (str[1] & 0x3F) << 6;
|
||||
c += (str[2] & 0x3F);
|
||||
|
||||
c = mozilla::unicode::GetLowercase(c);
|
||||
c = mozilla::intl::UnicodeProperties::ToLower(c);
|
||||
|
||||
*aNext = aStr + 3;
|
||||
return c;
|
||||
|
@ -392,7 +395,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline(
|
|||
c += (str[2] & 0x3F) << 6;
|
||||
c += (str[3] & 0x3F);
|
||||
|
||||
c = mozilla::unicode::GetLowercase(c);
|
||||
c = mozilla::intl::UnicodeProperties::ToLower(c);
|
||||
|
||||
*aNext = aStr + 4;
|
||||
return c;
|
||||
|
@ -514,7 +517,7 @@ uint32_t HashUTF8AsUTF16(const char* aUTF8, uint32_t aLength, bool* aErr) {
|
|||
}
|
||||
|
||||
bool IsSegmentBreakSkipChar(uint32_t u) {
|
||||
return unicode::IsEastAsianWidthFHWexcludingEmoji(u) &&
|
||||
return intl::UnicodeProperties::IsEastAsianWidthFHWexcludingEmoji(u) &&
|
||||
unicode::GetScriptCode(u) != unicode::Script::HANGUL;
|
||||
}
|
||||
|
||||
|
|
|
@ -177,7 +177,8 @@ enum HSType {
|
|||
};
|
||||
|
||||
static HSType GetHangulSyllableType(uint32_t aCh) {
|
||||
return HSType(u_getIntPropertyValue(aCh, UCHAR_HANGUL_SYLLABLE_TYPE));
|
||||
return HSType(intl::UnicodeProperties::GetIntPropertyValue(
|
||||
aCh, intl::UnicodeProperties::IntProperty::HangulSyllableType));
|
||||
}
|
||||
|
||||
void ClusterIterator::Next() {
|
||||
|
|
|
@ -7,14 +7,13 @@
|
|||
#ifndef NS_UNICODEPROPERTIES_H
|
||||
#define NS_UNICODEPROPERTIES_H
|
||||
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
|
||||
#include "nsBidiUtils.h"
|
||||
#include "nsUGenCategory.h"
|
||||
#include "nsUnicodeScriptCodes.h"
|
||||
#include "harfbuzz/hb.h"
|
||||
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
||||
const nsCharProps2& GetCharProps2(uint32_t aCh);
|
||||
|
||||
namespace mozilla {
|
||||
|
@ -57,32 +56,17 @@ const uint32_t kEmojiSkinToneLast = 0x1f3ff;
|
|||
|
||||
extern const hb_unicode_general_category_t sICUtoHBcategory[];
|
||||
|
||||
inline uint32_t GetMirroredChar(uint32_t aCh) { return u_charMirror(aCh); }
|
||||
|
||||
inline bool HasMirroredChar(uint32_t aCh) { return u_isMirrored(aCh); }
|
||||
|
||||
inline uint8_t GetCombiningClass(uint32_t aCh) {
|
||||
return u_getCombiningClass(aCh);
|
||||
}
|
||||
|
||||
inline uint8_t GetGeneralCategory(uint32_t aCh) {
|
||||
return sICUtoHBcategory[u_charType(aCh)];
|
||||
return sICUtoHBcategory[intl::UnicodeProperties::CharType(aCh)];
|
||||
}
|
||||
|
||||
inline nsCharType GetBidiCat(uint32_t aCh) {
|
||||
return nsCharType(u_charDirection(aCh));
|
||||
}
|
||||
|
||||
inline int8_t GetNumericValue(uint32_t aCh) {
|
||||
UNumericType type =
|
||||
UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
|
||||
return type == U_NT_DECIMAL || type == U_NT_DIGIT
|
||||
? int8_t(u_getNumericValue(aCh))
|
||||
: -1;
|
||||
}
|
||||
|
||||
inline uint8_t GetLineBreakClass(uint32_t aCh) {
|
||||
return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK);
|
||||
return intl::UnicodeProperties::GetIntPropertyValue(
|
||||
aCh, intl::UnicodeProperties::IntProperty::LineBreak);
|
||||
}
|
||||
|
||||
inline Script GetScriptCode(uint32_t aCh) {
|
||||
|
@ -104,28 +88,22 @@ inline uint32_t GetScriptTagForCode(Script aScriptCode) {
|
|||
}
|
||||
|
||||
inline PairedBracketType GetPairedBracketType(uint32_t aCh) {
|
||||
return PairedBracketType(
|
||||
u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
|
||||
return PairedBracketType(intl::UnicodeProperties::GetIntPropertyValue(
|
||||
aCh, intl::UnicodeProperties::IntProperty::BidiPairedBracketType));
|
||||
}
|
||||
|
||||
inline uint32_t GetPairedBracket(uint32_t aCh) {
|
||||
return u_getBidiPairedBracket(aCh);
|
||||
}
|
||||
|
||||
inline uint32_t GetUppercase(uint32_t aCh) { return u_toupper(aCh); }
|
||||
|
||||
inline uint32_t GetLowercase(uint32_t aCh) { return u_tolower(aCh); }
|
||||
|
||||
inline uint32_t GetTitlecaseForLower(
|
||||
uint32_t aCh) // maps LC to titlecase, UC unchanged
|
||||
{
|
||||
return u_isULowercase(aCh) ? u_totitle(aCh) : aCh;
|
||||
return intl::UnicodeProperties::IsLowercase(aCh)
|
||||
? intl::UnicodeProperties::ToTitle(aCh)
|
||||
: aCh;
|
||||
}
|
||||
|
||||
inline uint32_t GetTitlecaseForAll(
|
||||
uint32_t aCh) // maps both UC and LC to titlecase
|
||||
{
|
||||
return u_totitle(aCh);
|
||||
return intl::UnicodeProperties::ToTitle(aCh);
|
||||
}
|
||||
|
||||
inline uint32_t GetFoldedcase(uint32_t aCh) {
|
||||
|
@ -135,62 +113,22 @@ inline uint32_t GetFoldedcase(uint32_t aCh) {
|
|||
if (aCh == 0x0130 || aCh == 0x0131) {
|
||||
return 'i';
|
||||
}
|
||||
return u_foldCase(aCh, U_FOLD_CASE_DEFAULT);
|
||||
}
|
||||
|
||||
inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) {
|
||||
switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
|
||||
case U_EA_FULLWIDTH:
|
||||
case U_EA_HALFWIDTH:
|
||||
return true;
|
||||
case U_EA_WIDE:
|
||||
return u_hasBinaryProperty(aCh, UCHAR_EMOJI) ? false : true;
|
||||
case U_EA_AMBIGUOUS:
|
||||
case U_EA_NARROW:
|
||||
case U_EA_NEUTRAL:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool IsEastAsianWidthAFW(uint32_t aCh) {
|
||||
switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
|
||||
case U_EA_AMBIGUOUS:
|
||||
case U_EA_FULLWIDTH:
|
||||
case U_EA_WIDE:
|
||||
return true;
|
||||
case U_EA_HALFWIDTH:
|
||||
case U_EA_NARROW:
|
||||
case U_EA_NEUTRAL:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool IsEastAsianWidthFW(uint32_t aCh) {
|
||||
switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
|
||||
case U_EA_FULLWIDTH:
|
||||
case U_EA_WIDE:
|
||||
return true;
|
||||
case U_EA_AMBIGUOUS:
|
||||
case U_EA_HALFWIDTH:
|
||||
case U_EA_NARROW:
|
||||
case U_EA_NEUTRAL:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
return intl::UnicodeProperties::FoldCase(aCh);
|
||||
}
|
||||
|
||||
inline bool IsDefaultIgnorable(uint32_t aCh) {
|
||||
return u_hasBinaryProperty(aCh, UCHAR_DEFAULT_IGNORABLE_CODE_POINT);
|
||||
return intl::UnicodeProperties::HasBinaryProperty(
|
||||
aCh, intl::UnicodeProperties::BinaryProperty::DefaultIgnorableCodePoint);
|
||||
}
|
||||
|
||||
inline EmojiPresentation GetEmojiPresentation(uint32_t aCh) {
|
||||
if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) {
|
||||
if (!intl::UnicodeProperties::HasBinaryProperty(
|
||||
aCh, intl::UnicodeProperties::BinaryProperty::Emoji)) {
|
||||
return TextOnly;
|
||||
}
|
||||
|
||||
if (u_hasBinaryProperty(aCh, UCHAR_EMOJI_PRESENTATION)) {
|
||||
if (intl::UnicodeProperties::HasBinaryProperty(
|
||||
aCh, intl::UnicodeProperties::BinaryProperty::EmojiPresentation)) {
|
||||
return EmojiDefault;
|
||||
}
|
||||
return TextDefault;
|
||||
|
@ -271,11 +209,6 @@ uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength);
|
|||
// to the values we care about at runtime.
|
||||
bool IsCombiningDiacritic(uint32_t aCh);
|
||||
|
||||
// Keep this function in sync with is_math_symbol in base_chars.py.
|
||||
inline bool IsMathOrMusicSymbol(uint32_t aCh) {
|
||||
return u_charType(aCh) == U_MATH_SYMBOL || u_charType(aCh) == U_OTHER_SYMBOL;
|
||||
}
|
||||
|
||||
// Remove diacritics from a character
|
||||
uint32_t GetNaked(uint32_t aCh);
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "mozilla/intl/Script.h"
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
|
||||
// Currently we use the non-transitional processing option -- see
|
||||
// http://unicode.org/reports/tr46/
|
||||
|
@ -774,7 +775,8 @@ bool nsIDNService::isLabelSafe(const nsAString& label) {
|
|||
// Check for mixed numbering systems
|
||||
auto genCat = GetGeneralCategory(ch);
|
||||
if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
|
||||
uint32_t zeroCharacter = ch - GetNumericValue(ch);
|
||||
uint32_t zeroCharacter =
|
||||
ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
|
||||
if (savedNumberingSystem == 0) {
|
||||
// If we encounter a decimal number, save the zero character from that
|
||||
// numbering system.
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "mozilla/dom/HTMLOptionElement.h"
|
||||
#include "mozilla/dom/HTMLSelectElement.h"
|
||||
#include "mozilla/dom/Text.h"
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
#include "mozilla/intl/WordBreaker.h"
|
||||
#include "mozilla/StaticPrefs_browser.h"
|
||||
|
||||
|
@ -775,7 +776,7 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,
|
|||
// already guaranteed to not be a combining diacritical mark.)
|
||||
c = (t2b ? DecodeChar(t2b, &findex) : CHAR_TO_UNICHAR(t1b[findex]));
|
||||
if (!mMatchDiacritics && IsCombiningDiacritic(c) &&
|
||||
!IsMathOrMusicSymbol(prevChar)) {
|
||||
!intl::UnicodeProperties::IsMathOrMusicSymbol(prevChar)) {
|
||||
continue;
|
||||
}
|
||||
patc = DecodeChar(patStr, &pindex);
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "nsUnicodeProperties.h"
|
||||
#include "mozilla/intl/UnicodeProperties.h"
|
||||
|
||||
// Verify the assertion in SQLFunctions.cpp / nextSearchCandidate that the
|
||||
// only non-ASCII characters that lower-case to ASCII ones are:
|
||||
|
@ -15,7 +15,7 @@ TEST(MatchAutocompleteCasing, CaseAssumption)
|
|||
{
|
||||
for (uint32_t c = 128; c < 0x110000; c++) {
|
||||
if (c != 304 && c != 8490) {
|
||||
ASSERT_GE(mozilla::unicode::GetLowercase(c), 128U);
|
||||
ASSERT_GE(mozilla::intl::UnicodeProperties::ToLower(c), 128U);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,6 @@ TEST(MatchAutocompleteCasing, CaseAssumption)
|
|||
TEST(MatchAutocompleteCasing, CaseAssumption2)
|
||||
{
|
||||
for (uint32_t c = 0; c < 128; c++) {
|
||||
ASSERT_LT(mozilla::unicode::GetLowercase(c), 128U);
|
||||
ASSERT_LT(mozilla::intl::UnicodeProperties::ToLower(c), 128U);
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче