зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1569613 - Add surrogate pair handling API to `nsTextFragment` r=hsivonen
We check surrogate pair at specific index in `nsTextFragement` in a lot of places. This requires boundary check of the index so that it can cause security issue and crash reason with simple mistake, and also it steals your time to understand the code what it does especially when it's a part of an `if` condition. Therefore, this patch adds new API to `nsTextFragment` and makes the all surrogate pair handlers of `nsTextFragument` use new API. Differential Revision: https://phabricator.services.mozilla.com/D39689 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
c6ce52db91
Коммит
f3fb0d1af8
|
@ -1470,22 +1470,6 @@ bool nsContentUtils::IsFirstLetterPunctuation(uint32_t aChar) {
|
|||
}
|
||||
}
|
||||
|
||||
// static
|
||||
bool nsContentUtils::IsFirstLetterPunctuationAt(const nsTextFragment* aFrag,
|
||||
uint32_t aOffset) {
|
||||
char16_t h = aFrag->CharAt(aOffset);
|
||||
if (!IS_SURROGATE(h)) {
|
||||
return IsFirstLetterPunctuation(h);
|
||||
}
|
||||
if (NS_IS_HIGH_SURROGATE(h) && aOffset + 1 < aFrag->GetLength()) {
|
||||
char16_t l = aFrag->CharAt(aOffset + 1);
|
||||
if (NS_IS_LOW_SURROGATE(l)) {
|
||||
return IsFirstLetterPunctuation(SURROGATE_TO_UCS4(h, l));
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// static
|
||||
bool nsContentUtils::IsAlphanumeric(uint32_t aChar) {
|
||||
nsUGenCategory cat = mozilla::unicode::GetGenCategory(aChar);
|
||||
|
|
|
@ -547,8 +547,6 @@ class nsContentUtils {
|
|||
* Returns true if aChar is of class Ps, Pi, Po, Pf, or Pe.
|
||||
*/
|
||||
static bool IsFirstLetterPunctuation(uint32_t aChar);
|
||||
static bool IsFirstLetterPunctuationAt(const nsTextFragment* aFrag,
|
||||
uint32_t aOffset);
|
||||
|
||||
/**
|
||||
* Returns true if aChar is of class Lu, Ll, Lt, Lm, Lo, Nd, Nl or No
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "mozilla/Attributes.h"
|
||||
#include "mozilla/MemoryReporting.h"
|
||||
|
||||
#include "nsCharTraits.h"
|
||||
#include "nsString.h"
|
||||
#include "nsStringBuffer.h"
|
||||
#include "nsReadableUtils.h"
|
||||
|
@ -216,6 +217,59 @@ class nsTextFragment final {
|
|||
: static_cast<unsigned char>(m1b[aIndex]);
|
||||
}
|
||||
|
||||
/**
|
||||
* IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at
|
||||
* aIndex is high surrogate and it's followed by low surrogate.
|
||||
*/
|
||||
inline bool IsHighSurrogateFollowedByLowSurrogateAt(int32_t aIndex) const {
|
||||
MOZ_ASSERT(aIndex >= 0);
|
||||
MOZ_ASSERT(aIndex < mState.mLength);
|
||||
if (!mState.mIs2b || aIndex + 1 >= mState.mLength) {
|
||||
return false;
|
||||
}
|
||||
return NS_IS_HIGH_SURROGATE(Get2b()[aIndex]) &&
|
||||
NS_IS_LOW_SURROGATE(Get2b()[aIndex + 1]);
|
||||
}
|
||||
|
||||
/**
|
||||
* IsLowSurrogateFollowingHighSurrogateAt() returns true if character at
|
||||
* aIndex is low surrogate and it follows high surrogate.
|
||||
*/
|
||||
inline bool IsLowSurrogateFollowingHighSurrogateAt(int32_t aIndex) const {
|
||||
MOZ_ASSERT(aIndex >= 0);
|
||||
MOZ_ASSERT(aIndex < mState.mLength);
|
||||
if (!mState.mIs2b || aIndex <= 0) {
|
||||
return false;
|
||||
}
|
||||
return NS_IS_LOW_SURROGATE(Get2b()[aIndex]) &&
|
||||
NS_IS_HIGH_SURROGATE(Get2b()[aIndex - 1]);
|
||||
}
|
||||
|
||||
/**
|
||||
* ScalarValueAt() returns a Unicode scalar value at aIndex. If the character
|
||||
* at aIndex is a high surrogate followed by low surrogate, returns character
|
||||
* code for the pair. If the index is low surrogate, or a high surrogate but
|
||||
* not in a pair, returns 0.
|
||||
*/
|
||||
inline char32_t ScalarValueAt(int32_t aIndex) const {
|
||||
MOZ_ASSERT(aIndex >= 0);
|
||||
MOZ_ASSERT(aIndex < mState.mLength);
|
||||
if (!mState.mIs2b) {
|
||||
return static_cast<unsigned char>(m1b[aIndex]);
|
||||
}
|
||||
char16_t ch = Get2b()[aIndex];
|
||||
if (!IS_SURROGATE(ch)) {
|
||||
return ch;
|
||||
}
|
||||
if (aIndex + 1 < mState.mLength && NS_IS_HIGH_SURROGATE(ch)) {
|
||||
char16_t nextCh = Get2b()[aIndex + 1];
|
||||
if (NS_IS_LOW_SURROGATE(nextCh)) {
|
||||
return SURROGATE_TO_UCS4(ch, nextCh);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void SetBidi(bool aBidi) { mState.mIsBidi = aBidi; }
|
||||
|
||||
struct FragmentBits {
|
||||
|
|
|
@ -991,8 +991,7 @@ nsresult ContentEventHandler::ExpandToClusterBoundary(nsIContent* aContent,
|
|||
// If the frame isn't available, we only can check surrogate pair...
|
||||
const nsTextFragment* text = &aContent->AsText()->TextFragment();
|
||||
NS_ENSURE_TRUE(text, NS_ERROR_FAILURE);
|
||||
if (NS_IS_LOW_SURROGATE(text->CharAt(*aXPOffset)) &&
|
||||
NS_IS_HIGH_SURROGATE(text->CharAt(*aXPOffset - 1))) {
|
||||
if (text->IsLowSurrogateFollowingHighSurrogateAt(*aXPOffset)) {
|
||||
*aXPOffset += aForward ? 1 : -1;
|
||||
}
|
||||
return NS_OK;
|
||||
|
|
|
@ -2448,8 +2448,7 @@ nsresult HTMLEditRules::WillDeleteSelection(
|
|||
// Bug 1068979: delete both codepoints if surrogate pair
|
||||
if (so > 0) {
|
||||
const nsTextFragment* text = &nodeAsText->TextFragment();
|
||||
if (NS_IS_LOW_SURROGATE(text->CharAt(so)) &&
|
||||
NS_IS_HIGH_SURROGATE(text->CharAt(so - 1))) {
|
||||
if (text->IsLowSurrogateFollowingHighSurrogateAt(so)) {
|
||||
so--;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -500,8 +500,8 @@ nsresult TextEditor::ExtendSelectionForDelete(nsIEditor::EDirection* aAction) {
|
|||
const nsTextFragment* data =
|
||||
&insertionPoint.GetContainerAsText()->TextFragment();
|
||||
uint32_t offset = insertionPoint.Offset();
|
||||
if ((offset > 1 && NS_IS_LOW_SURROGATE(data->CharAt(offset - 1)) &&
|
||||
NS_IS_HIGH_SURROGATE(data->CharAt(offset - 2))) ||
|
||||
if ((offset > 1 &&
|
||||
data->IsLowSurrogateFollowingHighSurrogateAt(offset - 1)) ||
|
||||
(offset > 0 &&
|
||||
gfxFontUtils::IsVarSelector(data->CharAt(offset - 1)))) {
|
||||
nsresult rv = selCont->CharacterExtendForBackspace();
|
||||
|
@ -2188,8 +2188,7 @@ nsresult TextEditor::SetUnmaskRangeInternal(uint32_t aStart, uint32_t aLength,
|
|||
// preceding high surrogate because the caller may want to show a
|
||||
// character before the character at `aStart + 1`.
|
||||
const nsTextFragment* textFragment = text->GetText();
|
||||
if (aStart > 0 && NS_IS_LOW_SURROGATE(textFragment->CharAt(aStart)) &&
|
||||
NS_IS_HIGH_SURROGATE(textFragment->CharAt(aStart - 1))) {
|
||||
if (textFragment->IsLowSurrogateFollowingHighSurrogateAt(aStart)) {
|
||||
mUnmaskedStart = aStart - 1;
|
||||
// If caller collapses the range, keep it. Otherwise, expand the length.
|
||||
if (aLength > 0) {
|
||||
|
@ -2202,11 +2201,8 @@ nsresult TextEditor::SetUnmaskRangeInternal(uint32_t aStart, uint32_t aLength,
|
|||
// If unmasked end is middle of a surrogate pair, expand it to include
|
||||
// the following low surrogate because the caller may want to show a
|
||||
// character after the character at `aStart + aLength`.
|
||||
if (mUnmaskedLength > 0 && mUnmaskedStart + mUnmaskedLength < valueLength &&
|
||||
NS_IS_HIGH_SURROGATE(
|
||||
textFragment->CharAt(mUnmaskedStart + mUnmaskedLength - 1)) &&
|
||||
NS_IS_LOW_SURROGATE(
|
||||
textFragment->CharAt(mUnmaskedStart + mUnmaskedLength))) {
|
||||
if (UnmaskedEnd() < valueLength &&
|
||||
textFragment->IsLowSurrogateFollowingHighSurrogateAt(UnmaskedEnd())) {
|
||||
++mUnmaskedLength;
|
||||
}
|
||||
// If it's first time to mask the unmasking characters with timer, create
|
||||
|
|
|
@ -3100,16 +3100,15 @@ static bool IsJustifiableCharacter(const nsStyleText* aTextStyle,
|
|||
(0xff5eu <= ch && ch <= 0xff9fu)) {
|
||||
return true;
|
||||
}
|
||||
char16_t ch2;
|
||||
if (NS_IS_HIGH_SURROGATE(ch) && aFrag->GetLength() > uint32_t(aPos) + 1 &&
|
||||
NS_IS_LOW_SURROGATE(ch2 = aFrag->CharAt(aPos + 1))) {
|
||||
uint32_t u = SURROGATE_TO_UCS4(ch, ch2);
|
||||
// CJK Unified Ideographs Extension B,
|
||||
// CJK Unified Ideographs Extension C,
|
||||
// CJK Unified Ideographs Extension D,
|
||||
// CJK Compatibility Ideographs Supplement
|
||||
if (0x20000u <= u && u <= 0x2ffffu) {
|
||||
return true;
|
||||
if (NS_IS_HIGH_SURROGATE(ch)) {
|
||||
if (char32_t u = aFrag->ScalarValueAt(aPos)) {
|
||||
// CJK Unified Ideographs Extension B,
|
||||
// CJK Unified Ideographs Extension C,
|
||||
// CJK Unified Ideographs Extension D,
|
||||
// CJK Compatibility Ideographs Supplement
|
||||
if (0x20000u <= u && u <= 0x2ffffu) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7218,11 +7217,8 @@ nsIFrame::ContentOffsets nsTextFrame::GetCharacterOffsetAtFramePointInternal(
|
|||
// chars that are ligated in the textrun to form a single flag symbol.
|
||||
uint32_t offs = extraCluster.GetOriginalOffset();
|
||||
const nsTextFragment* frag = TextFragment();
|
||||
if (offs + 1 < frag->GetLength() &&
|
||||
NS_IS_HIGH_SURROGATE(frag->CharAt(offs)) &&
|
||||
NS_IS_LOW_SURROGATE(frag->CharAt(offs + 1)) &&
|
||||
gfxFontUtils::IsRegionalIndicator(
|
||||
SURROGATE_TO_UCS4(frag->CharAt(offs), frag->CharAt(offs + 1)))) {
|
||||
if (frag->IsHighSurrogateFollowedByLowSurrogateAt(offs) &&
|
||||
gfxFontUtils::IsRegionalIndicator(frag->ScalarValueAt(offs))) {
|
||||
allowSplitLigature = false;
|
||||
if (extraCluster.GetSkippedOffset() > 1 &&
|
||||
!mTextRun->IsLigatureGroupStart(extraCluster.GetSkippedOffset())) {
|
||||
|
@ -7732,8 +7728,7 @@ static bool IsAcceptableCaretPosition(const gfxSkipCharsIterator& aIter,
|
|||
uint32_t ch = frag->CharAt(offs);
|
||||
|
||||
if (gfxFontUtils::IsVarSelector(ch) ||
|
||||
(NS_IS_LOW_SURROGATE(ch) && offs > 0 &&
|
||||
NS_IS_HIGH_SURROGATE(frag->CharAt(offs - 1))) ||
|
||||
frag->IsLowSurrogateFollowingHighSurrogateAt(offs) ||
|
||||
(!aTextRun->IsLigatureGroupStart(index) &&
|
||||
(unicode::GetEmojiPresentation(ch) == unicode::EmojiDefault ||
|
||||
(unicode::GetEmojiPresentation(ch) == unicode::TextDefault &&
|
||||
|
@ -7744,17 +7739,15 @@ static bool IsAcceptableCaretPosition(const gfxSkipCharsIterator& aIter,
|
|||
|
||||
// If the proposed position is before a high surrogate, we need to decode
|
||||
// the surrogate pair (if valid) and check the resulting character.
|
||||
if (NS_IS_HIGH_SURROGATE(ch) && offs + 1 < frag->GetLength()) {
|
||||
uint32_t ch2 = frag->CharAt(offs + 1);
|
||||
if (NS_IS_LOW_SURROGATE(ch2)) {
|
||||
ch = SURROGATE_TO_UCS4(ch, ch2);
|
||||
if (NS_IS_HIGH_SURROGATE(ch)) {
|
||||
if (char32_t ucs4 = frag->ScalarValueAt(offs)) {
|
||||
// If the character is a (Plane-14) variation selector,
|
||||
// or an emoji character that is ligated with the previous
|
||||
// character (i.e. part of a Regional-Indicator flag pair,
|
||||
// or an emoji-ZWJ sequence), this is not a valid boundary.
|
||||
if (gfxFontUtils::IsVarSelector(ch) ||
|
||||
if (gfxFontUtils::IsVarSelector(ucs4) ||
|
||||
(!aTextRun->IsLigatureGroupStart(index) &&
|
||||
unicode::GetEmojiPresentation(ch) == unicode::EmojiDefault)) {
|
||||
unicode::GetEmojiPresentation(ucs4) == unicode::EmojiDefault)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -7861,9 +7854,7 @@ bool ClusterIterator::IsPunctuation() {
|
|||
}
|
||||
|
||||
int32_t ClusterIterator::GetAfterInternal() {
|
||||
if (mFrag->Is2b() && NS_IS_HIGH_SURROGATE(mFrag->Get2b()[mCharIndex]) &&
|
||||
uint32_t(mCharIndex) + 1 < mFrag->GetLength() &&
|
||||
NS_IS_LOW_SURROGATE(mFrag->Get2b()[mCharIndex + 1])) {
|
||||
if (mFrag->IsHighSurrogateFollowedByLowSurrogateAt(mCharIndex)) {
|
||||
return mCharIndex + 2;
|
||||
}
|
||||
return mCharIndex + 1;
|
||||
|
@ -7928,16 +7919,14 @@ ClusterIterator::ClusterIterator(nsTextFrame* aTextFrame, int32_t aPosition,
|
|||
nsString maskedText;
|
||||
maskedText.SetCapacity(mFrag->GetLength());
|
||||
for (uint32_t i = 0; i < mFrag->GetLength(); ++i) {
|
||||
uint32_t ch = mFrag->CharAt(i);
|
||||
mIterator.SetOriginalOffset(i);
|
||||
uint32_t skippedOffset = mIterator.GetSkippedOffset();
|
||||
if (NS_IS_HIGH_SURROGATE(ch) && i < mFrag->GetLength() - 1 &&
|
||||
NS_IS_LOW_SURROGATE(mFrag->CharAt(i + 1))) {
|
||||
if (mFrag->IsHighSurrogateFollowedByLowSurrogateAt(i)) {
|
||||
if (transformedTextRun->mStyles[skippedOffset]->mMaskPassword) {
|
||||
maskedText.Append(kPasswordMask);
|
||||
maskedText.Append(kPasswordMask);
|
||||
} else {
|
||||
maskedText.Append(ch);
|
||||
maskedText.Append(mFrag->CharAt(i));
|
||||
maskedText.Append(mFrag->CharAt(i + 1));
|
||||
}
|
||||
++i;
|
||||
|
@ -7945,7 +7934,7 @@ ClusterIterator::ClusterIterator(nsTextFrame* aTextFrame, int32_t aPosition,
|
|||
maskedText.Append(
|
||||
transformedTextRun->mStyles[skippedOffset]->mMaskPassword
|
||||
? kPasswordMask
|
||||
: ch);
|
||||
: mFrag->CharAt(i));
|
||||
}
|
||||
}
|
||||
mMaskedFrag.SetTo(maskedText, mFrag->IsBidi(), true);
|
||||
|
@ -8085,7 +8074,8 @@ static int32_t FindEndOfPunctuationRun(const nsTextFragment* aFrag,
|
|||
int32_t i;
|
||||
|
||||
for (i = aStart; i < aEnd - aOffset; ++i) {
|
||||
if (nsContentUtils::IsFirstLetterPunctuationAt(aFrag, aOffset + i)) {
|
||||
if (nsContentUtils::IsFirstLetterPunctuation(
|
||||
aFrag->ScalarValueAt(aOffset + i))) {
|
||||
aIter->SetOriginalOffset(aOffset + i);
|
||||
FindClusterEnd(aTextRun, aEnd, aIter);
|
||||
i = aIter->GetOriginalOffset() - aOffset;
|
||||
|
|
Загрузка…
Ссылка в новой задаче