Bug 1569613 - Add surrogate pair handling API to `nsTextFragment` r=hsivonen

We check surrogate pair at specific index in `nsTextFragement` in a lot of
places.  This requires boundary check of the index so that it can cause
security issue and crash reason with simple mistake, and also it steals
your time to understand the code what it does especially when it's a
part of an `if` condition.

Therefore, this patch adds new API to `nsTextFragment` and makes the all
surrogate pair handlers of `nsTextFragument` use new API.

Differential Revision: https://phabricator.services.mozilla.com/D39689

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Masayuki Nakano 2019-08-06 05:43:21 +00:00
Родитель c6ce52db91
Коммит f3fb0d1af8
7 изменённых файлов: 83 добавлений и 63 удалений

Просмотреть файл

@ -1470,22 +1470,6 @@ bool nsContentUtils::IsFirstLetterPunctuation(uint32_t aChar) {
}
}
// static
bool nsContentUtils::IsFirstLetterPunctuationAt(const nsTextFragment* aFrag,
uint32_t aOffset) {
char16_t h = aFrag->CharAt(aOffset);
if (!IS_SURROGATE(h)) {
return IsFirstLetterPunctuation(h);
}
if (NS_IS_HIGH_SURROGATE(h) && aOffset + 1 < aFrag->GetLength()) {
char16_t l = aFrag->CharAt(aOffset + 1);
if (NS_IS_LOW_SURROGATE(l)) {
return IsFirstLetterPunctuation(SURROGATE_TO_UCS4(h, l));
}
}
return false;
}
// static
bool nsContentUtils::IsAlphanumeric(uint32_t aChar) {
nsUGenCategory cat = mozilla::unicode::GetGenCategory(aChar);

Просмотреть файл

@ -547,8 +547,6 @@ class nsContentUtils {
* Returns true if aChar is of class Ps, Pi, Po, Pf, or Pe.
*/
static bool IsFirstLetterPunctuation(uint32_t aChar);
static bool IsFirstLetterPunctuationAt(const nsTextFragment* aFrag,
uint32_t aOffset);
/**
* Returns true if aChar is of class Lu, Ll, Lt, Lm, Lo, Nd, Nl or No

Просмотреть файл

@ -16,6 +16,7 @@
#include "mozilla/Attributes.h"
#include "mozilla/MemoryReporting.h"
#include "nsCharTraits.h"
#include "nsString.h"
#include "nsStringBuffer.h"
#include "nsReadableUtils.h"
@ -216,6 +217,59 @@ class nsTextFragment final {
: static_cast<unsigned char>(m1b[aIndex]);
}
/**
* IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at
* aIndex is high surrogate and it's followed by low surrogate.
*/
inline bool IsHighSurrogateFollowedByLowSurrogateAt(int32_t aIndex) const {
MOZ_ASSERT(aIndex >= 0);
MOZ_ASSERT(aIndex < mState.mLength);
if (!mState.mIs2b || aIndex + 1 >= mState.mLength) {
return false;
}
return NS_IS_HIGH_SURROGATE(Get2b()[aIndex]) &&
NS_IS_LOW_SURROGATE(Get2b()[aIndex + 1]);
}
/**
* IsLowSurrogateFollowingHighSurrogateAt() returns true if character at
* aIndex is low surrogate and it follows high surrogate.
*/
inline bool IsLowSurrogateFollowingHighSurrogateAt(int32_t aIndex) const {
MOZ_ASSERT(aIndex >= 0);
MOZ_ASSERT(aIndex < mState.mLength);
if (!mState.mIs2b || aIndex <= 0) {
return false;
}
return NS_IS_LOW_SURROGATE(Get2b()[aIndex]) &&
NS_IS_HIGH_SURROGATE(Get2b()[aIndex - 1]);
}
/**
* ScalarValueAt() returns a Unicode scalar value at aIndex. If the character
* at aIndex is a high surrogate followed by low surrogate, returns character
* code for the pair. If the index is low surrogate, or a high surrogate but
* not in a pair, returns 0.
*/
inline char32_t ScalarValueAt(int32_t aIndex) const {
MOZ_ASSERT(aIndex >= 0);
MOZ_ASSERT(aIndex < mState.mLength);
if (!mState.mIs2b) {
return static_cast<unsigned char>(m1b[aIndex]);
}
char16_t ch = Get2b()[aIndex];
if (!IS_SURROGATE(ch)) {
return ch;
}
if (aIndex + 1 < mState.mLength && NS_IS_HIGH_SURROGATE(ch)) {
char16_t nextCh = Get2b()[aIndex + 1];
if (NS_IS_LOW_SURROGATE(nextCh)) {
return SURROGATE_TO_UCS4(ch, nextCh);
}
}
return 0;
}
void SetBidi(bool aBidi) { mState.mIsBidi = aBidi; }
struct FragmentBits {

Просмотреть файл

@ -991,8 +991,7 @@ nsresult ContentEventHandler::ExpandToClusterBoundary(nsIContent* aContent,
// If the frame isn't available, we only can check surrogate pair...
const nsTextFragment* text = &aContent->AsText()->TextFragment();
NS_ENSURE_TRUE(text, NS_ERROR_FAILURE);
if (NS_IS_LOW_SURROGATE(text->CharAt(*aXPOffset)) &&
NS_IS_HIGH_SURROGATE(text->CharAt(*aXPOffset - 1))) {
if (text->IsLowSurrogateFollowingHighSurrogateAt(*aXPOffset)) {
*aXPOffset += aForward ? 1 : -1;
}
return NS_OK;

Просмотреть файл

@ -2448,8 +2448,7 @@ nsresult HTMLEditRules::WillDeleteSelection(
// Bug 1068979: delete both codepoints if surrogate pair
if (so > 0) {
const nsTextFragment* text = &nodeAsText->TextFragment();
if (NS_IS_LOW_SURROGATE(text->CharAt(so)) &&
NS_IS_HIGH_SURROGATE(text->CharAt(so - 1))) {
if (text->IsLowSurrogateFollowingHighSurrogateAt(so)) {
so--;
}
}

Просмотреть файл

@ -500,8 +500,8 @@ nsresult TextEditor::ExtendSelectionForDelete(nsIEditor::EDirection* aAction) {
const nsTextFragment* data =
&insertionPoint.GetContainerAsText()->TextFragment();
uint32_t offset = insertionPoint.Offset();
if ((offset > 1 && NS_IS_LOW_SURROGATE(data->CharAt(offset - 1)) &&
NS_IS_HIGH_SURROGATE(data->CharAt(offset - 2))) ||
if ((offset > 1 &&
data->IsLowSurrogateFollowingHighSurrogateAt(offset - 1)) ||
(offset > 0 &&
gfxFontUtils::IsVarSelector(data->CharAt(offset - 1)))) {
nsresult rv = selCont->CharacterExtendForBackspace();
@ -2188,8 +2188,7 @@ nsresult TextEditor::SetUnmaskRangeInternal(uint32_t aStart, uint32_t aLength,
// preceding high surrogate because the caller may want to show a
// character before the character at `aStart + 1`.
const nsTextFragment* textFragment = text->GetText();
if (aStart > 0 && NS_IS_LOW_SURROGATE(textFragment->CharAt(aStart)) &&
NS_IS_HIGH_SURROGATE(textFragment->CharAt(aStart - 1))) {
if (textFragment->IsLowSurrogateFollowingHighSurrogateAt(aStart)) {
mUnmaskedStart = aStart - 1;
// If caller collapses the range, keep it. Otherwise, expand the length.
if (aLength > 0) {
@ -2202,11 +2201,8 @@ nsresult TextEditor::SetUnmaskRangeInternal(uint32_t aStart, uint32_t aLength,
// If unmasked end is middle of a surrogate pair, expand it to include
// the following low surrogate because the caller may want to show a
// character after the character at `aStart + aLength`.
if (mUnmaskedLength > 0 && mUnmaskedStart + mUnmaskedLength < valueLength &&
NS_IS_HIGH_SURROGATE(
textFragment->CharAt(mUnmaskedStart + mUnmaskedLength - 1)) &&
NS_IS_LOW_SURROGATE(
textFragment->CharAt(mUnmaskedStart + mUnmaskedLength))) {
if (UnmaskedEnd() < valueLength &&
textFragment->IsLowSurrogateFollowingHighSurrogateAt(UnmaskedEnd())) {
++mUnmaskedLength;
}
// If it's first time to mask the unmasking characters with timer, create

Просмотреть файл

@ -3100,16 +3100,15 @@ static bool IsJustifiableCharacter(const nsStyleText* aTextStyle,
(0xff5eu <= ch && ch <= 0xff9fu)) {
return true;
}
char16_t ch2;
if (NS_IS_HIGH_SURROGATE(ch) && aFrag->GetLength() > uint32_t(aPos) + 1 &&
NS_IS_LOW_SURROGATE(ch2 = aFrag->CharAt(aPos + 1))) {
uint32_t u = SURROGATE_TO_UCS4(ch, ch2);
// CJK Unified Ideographs Extension B,
// CJK Unified Ideographs Extension C,
// CJK Unified Ideographs Extension D,
// CJK Compatibility Ideographs Supplement
if (0x20000u <= u && u <= 0x2ffffu) {
return true;
if (NS_IS_HIGH_SURROGATE(ch)) {
if (char32_t u = aFrag->ScalarValueAt(aPos)) {
// CJK Unified Ideographs Extension B,
// CJK Unified Ideographs Extension C,
// CJK Unified Ideographs Extension D,
// CJK Compatibility Ideographs Supplement
if (0x20000u <= u && u <= 0x2ffffu) {
return true;
}
}
}
}
@ -7218,11 +7217,8 @@ nsIFrame::ContentOffsets nsTextFrame::GetCharacterOffsetAtFramePointInternal(
// chars that are ligated in the textrun to form a single flag symbol.
uint32_t offs = extraCluster.GetOriginalOffset();
const nsTextFragment* frag = TextFragment();
if (offs + 1 < frag->GetLength() &&
NS_IS_HIGH_SURROGATE(frag->CharAt(offs)) &&
NS_IS_LOW_SURROGATE(frag->CharAt(offs + 1)) &&
gfxFontUtils::IsRegionalIndicator(
SURROGATE_TO_UCS4(frag->CharAt(offs), frag->CharAt(offs + 1)))) {
if (frag->IsHighSurrogateFollowedByLowSurrogateAt(offs) &&
gfxFontUtils::IsRegionalIndicator(frag->ScalarValueAt(offs))) {
allowSplitLigature = false;
if (extraCluster.GetSkippedOffset() > 1 &&
!mTextRun->IsLigatureGroupStart(extraCluster.GetSkippedOffset())) {
@ -7732,8 +7728,7 @@ static bool IsAcceptableCaretPosition(const gfxSkipCharsIterator& aIter,
uint32_t ch = frag->CharAt(offs);
if (gfxFontUtils::IsVarSelector(ch) ||
(NS_IS_LOW_SURROGATE(ch) && offs > 0 &&
NS_IS_HIGH_SURROGATE(frag->CharAt(offs - 1))) ||
frag->IsLowSurrogateFollowingHighSurrogateAt(offs) ||
(!aTextRun->IsLigatureGroupStart(index) &&
(unicode::GetEmojiPresentation(ch) == unicode::EmojiDefault ||
(unicode::GetEmojiPresentation(ch) == unicode::TextDefault &&
@ -7744,17 +7739,15 @@ static bool IsAcceptableCaretPosition(const gfxSkipCharsIterator& aIter,
// If the proposed position is before a high surrogate, we need to decode
// the surrogate pair (if valid) and check the resulting character.
if (NS_IS_HIGH_SURROGATE(ch) && offs + 1 < frag->GetLength()) {
uint32_t ch2 = frag->CharAt(offs + 1);
if (NS_IS_LOW_SURROGATE(ch2)) {
ch = SURROGATE_TO_UCS4(ch, ch2);
if (NS_IS_HIGH_SURROGATE(ch)) {
if (char32_t ucs4 = frag->ScalarValueAt(offs)) {
// If the character is a (Plane-14) variation selector,
// or an emoji character that is ligated with the previous
// character (i.e. part of a Regional-Indicator flag pair,
// or an emoji-ZWJ sequence), this is not a valid boundary.
if (gfxFontUtils::IsVarSelector(ch) ||
if (gfxFontUtils::IsVarSelector(ucs4) ||
(!aTextRun->IsLigatureGroupStart(index) &&
unicode::GetEmojiPresentation(ch) == unicode::EmojiDefault)) {
unicode::GetEmojiPresentation(ucs4) == unicode::EmojiDefault)) {
return false;
}
}
@ -7861,9 +7854,7 @@ bool ClusterIterator::IsPunctuation() {
}
int32_t ClusterIterator::GetAfterInternal() {
if (mFrag->Is2b() && NS_IS_HIGH_SURROGATE(mFrag->Get2b()[mCharIndex]) &&
uint32_t(mCharIndex) + 1 < mFrag->GetLength() &&
NS_IS_LOW_SURROGATE(mFrag->Get2b()[mCharIndex + 1])) {
if (mFrag->IsHighSurrogateFollowedByLowSurrogateAt(mCharIndex)) {
return mCharIndex + 2;
}
return mCharIndex + 1;
@ -7928,16 +7919,14 @@ ClusterIterator::ClusterIterator(nsTextFrame* aTextFrame, int32_t aPosition,
nsString maskedText;
maskedText.SetCapacity(mFrag->GetLength());
for (uint32_t i = 0; i < mFrag->GetLength(); ++i) {
uint32_t ch = mFrag->CharAt(i);
mIterator.SetOriginalOffset(i);
uint32_t skippedOffset = mIterator.GetSkippedOffset();
if (NS_IS_HIGH_SURROGATE(ch) && i < mFrag->GetLength() - 1 &&
NS_IS_LOW_SURROGATE(mFrag->CharAt(i + 1))) {
if (mFrag->IsHighSurrogateFollowedByLowSurrogateAt(i)) {
if (transformedTextRun->mStyles[skippedOffset]->mMaskPassword) {
maskedText.Append(kPasswordMask);
maskedText.Append(kPasswordMask);
} else {
maskedText.Append(ch);
maskedText.Append(mFrag->CharAt(i));
maskedText.Append(mFrag->CharAt(i + 1));
}
++i;
@ -7945,7 +7934,7 @@ ClusterIterator::ClusterIterator(nsTextFrame* aTextFrame, int32_t aPosition,
maskedText.Append(
transformedTextRun->mStyles[skippedOffset]->mMaskPassword
? kPasswordMask
: ch);
: mFrag->CharAt(i));
}
}
mMaskedFrag.SetTo(maskedText, mFrag->IsBidi(), true);
@ -8085,7 +8074,8 @@ static int32_t FindEndOfPunctuationRun(const nsTextFragment* aFrag,
int32_t i;
for (i = aStart; i < aEnd - aOffset; ++i) {
if (nsContentUtils::IsFirstLetterPunctuationAt(aFrag, aOffset + i)) {
if (nsContentUtils::IsFirstLetterPunctuation(
aFrag->ScalarValueAt(aOffset + i))) {
aIter->SetOriginalOffset(aOffset + i);
FindClusterEnd(aTextRun, aEnd, aIter);
i = aIter->GetOriginalOffset() - aOffset;