From f3fb0d1af82aea354568e96b460285ed06442c1b Mon Sep 17 00:00:00 2001 From: Masayuki Nakano Date: Tue, 6 Aug 2019 05:43:21 +0000 Subject: [PATCH] Bug 1569613 - Add surrogate pair handling API to `nsTextFragment` r=hsivonen We check surrogate pair at specific index in `nsTextFragement` in a lot of places. This requires boundary check of the index so that it can cause security issue and crash reason with simple mistake, and also it steals your time to understand the code what it does especially when it's a part of an `if` condition. Therefore, this patch adds new API to `nsTextFragment` and makes the all surrogate pair handlers of `nsTextFragument` use new API. Differential Revision: https://phabricator.services.mozilla.com/D39689 --HG-- extra : moz-landing-system : lando --- dom/base/nsContentUtils.cpp | 16 --------- dom/base/nsContentUtils.h | 2 -- dom/base/nsTextFragment.h | 54 ++++++++++++++++++++++++++++++ dom/events/ContentEventHandler.cpp | 3 +- editor/libeditor/HTMLEditRules.cpp | 3 +- editor/libeditor/TextEditor.cpp | 14 +++----- layout/generic/nsTextFrame.cpp | 54 ++++++++++++------------------ 7 files changed, 83 insertions(+), 63 deletions(-) diff --git a/dom/base/nsContentUtils.cpp b/dom/base/nsContentUtils.cpp index 821dd3d7840d..e403a5106cf8 100644 --- a/dom/base/nsContentUtils.cpp +++ b/dom/base/nsContentUtils.cpp @@ -1470,22 +1470,6 @@ bool nsContentUtils::IsFirstLetterPunctuation(uint32_t aChar) { } } -// static -bool nsContentUtils::IsFirstLetterPunctuationAt(const nsTextFragment* aFrag, - uint32_t aOffset) { - char16_t h = aFrag->CharAt(aOffset); - if (!IS_SURROGATE(h)) { - return IsFirstLetterPunctuation(h); - } - if (NS_IS_HIGH_SURROGATE(h) && aOffset + 1 < aFrag->GetLength()) { - char16_t l = aFrag->CharAt(aOffset + 1); - if (NS_IS_LOW_SURROGATE(l)) { - return IsFirstLetterPunctuation(SURROGATE_TO_UCS4(h, l)); - } - } - return false; -} - // static bool nsContentUtils::IsAlphanumeric(uint32_t aChar) { nsUGenCategory cat = mozilla::unicode::GetGenCategory(aChar); diff --git a/dom/base/nsContentUtils.h b/dom/base/nsContentUtils.h index 26e476cc5b13..5f15e3794525 100644 --- a/dom/base/nsContentUtils.h +++ b/dom/base/nsContentUtils.h @@ -547,8 +547,6 @@ class nsContentUtils { * Returns true if aChar is of class Ps, Pi, Po, Pf, or Pe. */ static bool IsFirstLetterPunctuation(uint32_t aChar); - static bool IsFirstLetterPunctuationAt(const nsTextFragment* aFrag, - uint32_t aOffset); /** * Returns true if aChar is of class Lu, Ll, Lt, Lm, Lo, Nd, Nl or No diff --git a/dom/base/nsTextFragment.h b/dom/base/nsTextFragment.h index 90162b50ef40..2c5438f6d27b 100644 --- a/dom/base/nsTextFragment.h +++ b/dom/base/nsTextFragment.h @@ -16,6 +16,7 @@ #include "mozilla/Attributes.h" #include "mozilla/MemoryReporting.h" +#include "nsCharTraits.h" #include "nsString.h" #include "nsStringBuffer.h" #include "nsReadableUtils.h" @@ -216,6 +217,59 @@ class nsTextFragment final { : static_cast(m1b[aIndex]); } + /** + * IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at + * aIndex is high surrogate and it's followed by low surrogate. + */ + inline bool IsHighSurrogateFollowedByLowSurrogateAt(int32_t aIndex) const { + MOZ_ASSERT(aIndex >= 0); + MOZ_ASSERT(aIndex < mState.mLength); + if (!mState.mIs2b || aIndex + 1 >= mState.mLength) { + return false; + } + return NS_IS_HIGH_SURROGATE(Get2b()[aIndex]) && + NS_IS_LOW_SURROGATE(Get2b()[aIndex + 1]); + } + + /** + * IsLowSurrogateFollowingHighSurrogateAt() returns true if character at + * aIndex is low surrogate and it follows high surrogate. + */ + inline bool IsLowSurrogateFollowingHighSurrogateAt(int32_t aIndex) const { + MOZ_ASSERT(aIndex >= 0); + MOZ_ASSERT(aIndex < mState.mLength); + if (!mState.mIs2b || aIndex <= 0) { + return false; + } + return NS_IS_LOW_SURROGATE(Get2b()[aIndex]) && + NS_IS_HIGH_SURROGATE(Get2b()[aIndex - 1]); + } + + /** + * ScalarValueAt() returns a Unicode scalar value at aIndex. If the character + * at aIndex is a high surrogate followed by low surrogate, returns character + * code for the pair. If the index is low surrogate, or a high surrogate but + * not in a pair, returns 0. + */ + inline char32_t ScalarValueAt(int32_t aIndex) const { + MOZ_ASSERT(aIndex >= 0); + MOZ_ASSERT(aIndex < mState.mLength); + if (!mState.mIs2b) { + return static_cast(m1b[aIndex]); + } + char16_t ch = Get2b()[aIndex]; + if (!IS_SURROGATE(ch)) { + return ch; + } + if (aIndex + 1 < mState.mLength && NS_IS_HIGH_SURROGATE(ch)) { + char16_t nextCh = Get2b()[aIndex + 1]; + if (NS_IS_LOW_SURROGATE(nextCh)) { + return SURROGATE_TO_UCS4(ch, nextCh); + } + } + return 0; + } + void SetBidi(bool aBidi) { mState.mIsBidi = aBidi; } struct FragmentBits { diff --git a/dom/events/ContentEventHandler.cpp b/dom/events/ContentEventHandler.cpp index bf75be2a52b6..ee5796166a12 100644 --- a/dom/events/ContentEventHandler.cpp +++ b/dom/events/ContentEventHandler.cpp @@ -991,8 +991,7 @@ nsresult ContentEventHandler::ExpandToClusterBoundary(nsIContent* aContent, // If the frame isn't available, we only can check surrogate pair... const nsTextFragment* text = &aContent->AsText()->TextFragment(); NS_ENSURE_TRUE(text, NS_ERROR_FAILURE); - if (NS_IS_LOW_SURROGATE(text->CharAt(*aXPOffset)) && - NS_IS_HIGH_SURROGATE(text->CharAt(*aXPOffset - 1))) { + if (text->IsLowSurrogateFollowingHighSurrogateAt(*aXPOffset)) { *aXPOffset += aForward ? 1 : -1; } return NS_OK; diff --git a/editor/libeditor/HTMLEditRules.cpp b/editor/libeditor/HTMLEditRules.cpp index c9d4618aff42..efe13f746272 100644 --- a/editor/libeditor/HTMLEditRules.cpp +++ b/editor/libeditor/HTMLEditRules.cpp @@ -2448,8 +2448,7 @@ nsresult HTMLEditRules::WillDeleteSelection( // Bug 1068979: delete both codepoints if surrogate pair if (so > 0) { const nsTextFragment* text = &nodeAsText->TextFragment(); - if (NS_IS_LOW_SURROGATE(text->CharAt(so)) && - NS_IS_HIGH_SURROGATE(text->CharAt(so - 1))) { + if (text->IsLowSurrogateFollowingHighSurrogateAt(so)) { so--; } } diff --git a/editor/libeditor/TextEditor.cpp b/editor/libeditor/TextEditor.cpp index 32f32185f186..df266f5b4b2d 100644 --- a/editor/libeditor/TextEditor.cpp +++ b/editor/libeditor/TextEditor.cpp @@ -500,8 +500,8 @@ nsresult TextEditor::ExtendSelectionForDelete(nsIEditor::EDirection* aAction) { const nsTextFragment* data = &insertionPoint.GetContainerAsText()->TextFragment(); uint32_t offset = insertionPoint.Offset(); - if ((offset > 1 && NS_IS_LOW_SURROGATE(data->CharAt(offset - 1)) && - NS_IS_HIGH_SURROGATE(data->CharAt(offset - 2))) || + if ((offset > 1 && + data->IsLowSurrogateFollowingHighSurrogateAt(offset - 1)) || (offset > 0 && gfxFontUtils::IsVarSelector(data->CharAt(offset - 1)))) { nsresult rv = selCont->CharacterExtendForBackspace(); @@ -2188,8 +2188,7 @@ nsresult TextEditor::SetUnmaskRangeInternal(uint32_t aStart, uint32_t aLength, // preceding high surrogate because the caller may want to show a // character before the character at `aStart + 1`. const nsTextFragment* textFragment = text->GetText(); - if (aStart > 0 && NS_IS_LOW_SURROGATE(textFragment->CharAt(aStart)) && - NS_IS_HIGH_SURROGATE(textFragment->CharAt(aStart - 1))) { + if (textFragment->IsLowSurrogateFollowingHighSurrogateAt(aStart)) { mUnmaskedStart = aStart - 1; // If caller collapses the range, keep it. Otherwise, expand the length. if (aLength > 0) { @@ -2202,11 +2201,8 @@ nsresult TextEditor::SetUnmaskRangeInternal(uint32_t aStart, uint32_t aLength, // If unmasked end is middle of a surrogate pair, expand it to include // the following low surrogate because the caller may want to show a // character after the character at `aStart + aLength`. - if (mUnmaskedLength > 0 && mUnmaskedStart + mUnmaskedLength < valueLength && - NS_IS_HIGH_SURROGATE( - textFragment->CharAt(mUnmaskedStart + mUnmaskedLength - 1)) && - NS_IS_LOW_SURROGATE( - textFragment->CharAt(mUnmaskedStart + mUnmaskedLength))) { + if (UnmaskedEnd() < valueLength && + textFragment->IsLowSurrogateFollowingHighSurrogateAt(UnmaskedEnd())) { ++mUnmaskedLength; } // If it's first time to mask the unmasking characters with timer, create diff --git a/layout/generic/nsTextFrame.cpp b/layout/generic/nsTextFrame.cpp index d5a22d680fd3..2aa595cfadae 100644 --- a/layout/generic/nsTextFrame.cpp +++ b/layout/generic/nsTextFrame.cpp @@ -3100,16 +3100,15 @@ static bool IsJustifiableCharacter(const nsStyleText* aTextStyle, (0xff5eu <= ch && ch <= 0xff9fu)) { return true; } - char16_t ch2; - if (NS_IS_HIGH_SURROGATE(ch) && aFrag->GetLength() > uint32_t(aPos) + 1 && - NS_IS_LOW_SURROGATE(ch2 = aFrag->CharAt(aPos + 1))) { - uint32_t u = SURROGATE_TO_UCS4(ch, ch2); - // CJK Unified Ideographs Extension B, - // CJK Unified Ideographs Extension C, - // CJK Unified Ideographs Extension D, - // CJK Compatibility Ideographs Supplement - if (0x20000u <= u && u <= 0x2ffffu) { - return true; + if (NS_IS_HIGH_SURROGATE(ch)) { + if (char32_t u = aFrag->ScalarValueAt(aPos)) { + // CJK Unified Ideographs Extension B, + // CJK Unified Ideographs Extension C, + // CJK Unified Ideographs Extension D, + // CJK Compatibility Ideographs Supplement + if (0x20000u <= u && u <= 0x2ffffu) { + return true; + } } } } @@ -7218,11 +7217,8 @@ nsIFrame::ContentOffsets nsTextFrame::GetCharacterOffsetAtFramePointInternal( // chars that are ligated in the textrun to form a single flag symbol. uint32_t offs = extraCluster.GetOriginalOffset(); const nsTextFragment* frag = TextFragment(); - if (offs + 1 < frag->GetLength() && - NS_IS_HIGH_SURROGATE(frag->CharAt(offs)) && - NS_IS_LOW_SURROGATE(frag->CharAt(offs + 1)) && - gfxFontUtils::IsRegionalIndicator( - SURROGATE_TO_UCS4(frag->CharAt(offs), frag->CharAt(offs + 1)))) { + if (frag->IsHighSurrogateFollowedByLowSurrogateAt(offs) && + gfxFontUtils::IsRegionalIndicator(frag->ScalarValueAt(offs))) { allowSplitLigature = false; if (extraCluster.GetSkippedOffset() > 1 && !mTextRun->IsLigatureGroupStart(extraCluster.GetSkippedOffset())) { @@ -7732,8 +7728,7 @@ static bool IsAcceptableCaretPosition(const gfxSkipCharsIterator& aIter, uint32_t ch = frag->CharAt(offs); if (gfxFontUtils::IsVarSelector(ch) || - (NS_IS_LOW_SURROGATE(ch) && offs > 0 && - NS_IS_HIGH_SURROGATE(frag->CharAt(offs - 1))) || + frag->IsLowSurrogateFollowingHighSurrogateAt(offs) || (!aTextRun->IsLigatureGroupStart(index) && (unicode::GetEmojiPresentation(ch) == unicode::EmojiDefault || (unicode::GetEmojiPresentation(ch) == unicode::TextDefault && @@ -7744,17 +7739,15 @@ static bool IsAcceptableCaretPosition(const gfxSkipCharsIterator& aIter, // If the proposed position is before a high surrogate, we need to decode // the surrogate pair (if valid) and check the resulting character. - if (NS_IS_HIGH_SURROGATE(ch) && offs + 1 < frag->GetLength()) { - uint32_t ch2 = frag->CharAt(offs + 1); - if (NS_IS_LOW_SURROGATE(ch2)) { - ch = SURROGATE_TO_UCS4(ch, ch2); + if (NS_IS_HIGH_SURROGATE(ch)) { + if (char32_t ucs4 = frag->ScalarValueAt(offs)) { // If the character is a (Plane-14) variation selector, // or an emoji character that is ligated with the previous // character (i.e. part of a Regional-Indicator flag pair, // or an emoji-ZWJ sequence), this is not a valid boundary. - if (gfxFontUtils::IsVarSelector(ch) || + if (gfxFontUtils::IsVarSelector(ucs4) || (!aTextRun->IsLigatureGroupStart(index) && - unicode::GetEmojiPresentation(ch) == unicode::EmojiDefault)) { + unicode::GetEmojiPresentation(ucs4) == unicode::EmojiDefault)) { return false; } } @@ -7861,9 +7854,7 @@ bool ClusterIterator::IsPunctuation() { } int32_t ClusterIterator::GetAfterInternal() { - if (mFrag->Is2b() && NS_IS_HIGH_SURROGATE(mFrag->Get2b()[mCharIndex]) && - uint32_t(mCharIndex) + 1 < mFrag->GetLength() && - NS_IS_LOW_SURROGATE(mFrag->Get2b()[mCharIndex + 1])) { + if (mFrag->IsHighSurrogateFollowedByLowSurrogateAt(mCharIndex)) { return mCharIndex + 2; } return mCharIndex + 1; @@ -7928,16 +7919,14 @@ ClusterIterator::ClusterIterator(nsTextFrame* aTextFrame, int32_t aPosition, nsString maskedText; maskedText.SetCapacity(mFrag->GetLength()); for (uint32_t i = 0; i < mFrag->GetLength(); ++i) { - uint32_t ch = mFrag->CharAt(i); mIterator.SetOriginalOffset(i); uint32_t skippedOffset = mIterator.GetSkippedOffset(); - if (NS_IS_HIGH_SURROGATE(ch) && i < mFrag->GetLength() - 1 && - NS_IS_LOW_SURROGATE(mFrag->CharAt(i + 1))) { + if (mFrag->IsHighSurrogateFollowedByLowSurrogateAt(i)) { if (transformedTextRun->mStyles[skippedOffset]->mMaskPassword) { maskedText.Append(kPasswordMask); maskedText.Append(kPasswordMask); } else { - maskedText.Append(ch); + maskedText.Append(mFrag->CharAt(i)); maskedText.Append(mFrag->CharAt(i + 1)); } ++i; @@ -7945,7 +7934,7 @@ ClusterIterator::ClusterIterator(nsTextFrame* aTextFrame, int32_t aPosition, maskedText.Append( transformedTextRun->mStyles[skippedOffset]->mMaskPassword ? kPasswordMask - : ch); + : mFrag->CharAt(i)); } } mMaskedFrag.SetTo(maskedText, mFrag->IsBidi(), true); @@ -8085,7 +8074,8 @@ static int32_t FindEndOfPunctuationRun(const nsTextFragment* aFrag, int32_t i; for (i = aStart; i < aEnd - aOffset; ++i) { - if (nsContentUtils::IsFirstLetterPunctuationAt(aFrag, aOffset + i)) { + if (nsContentUtils::IsFirstLetterPunctuation( + aFrag->ScalarValueAt(aOffset + i))) { aIter->SetOriginalOffset(aOffset + i); FindClusterEnd(aTextRun, aEnd, aIter); i = aIter->GetOriginalOffset() - aOffset;