From 3b3a1815ead706acf4ef883afd2f514ba53d4c57 Mon Sep 17 00:00:00 2001 From: James Teh Date: Tue, 4 Jun 2024 20:36:42 +0000 Subject: [PATCH] Bug 855184 part 1: Add BOUNDARY_CLUSTER so a11y can query grapheme clusters, AKA user-perceived characters. r=eeejay Most OS APIs want a cluster when they ask for a "character", except ATK. Rather than altering BOUNDARY_CHAR, I added a new BOUNDARY_CLUSTER. Aside from being less risky and causing less churn, there are cases internally where we want to move a TextLeafPoint by character; e.g. to explicitly move to the next/previous Accessible or to move to the next/previous character in an abstract way without worrying about Accessible boundaries. Calculating clusters is more expensive, so it doesn't make sense to move by cluster in those cases. Differential Revision: https://phabricator.services.mozilla.com/D212517 --- accessible/base/TextLeafRange.cpp | 59 +++++++++++++++++++ accessible/base/TextLeafRange.h | 3 + accessible/interfaces/nsIAccessibleText.idl | 6 ++ accessible/tests/browser/text/browser_text.js | 28 +++++++++ accessible/tests/mochitest/text.js | 1 + 5 files changed, 97 insertions(+) diff --git a/accessible/base/TextLeafRange.cpp b/accessible/base/TextLeafRange.cpp index 41d84a770c74..a1cbbd763995 100644 --- a/accessible/base/TextLeafRange.cpp +++ b/accessible/base/TextLeafRange.cpp @@ -1126,6 +1126,9 @@ TextLeafPoint TextLeafPoint::FindBoundary(AccessibleTextBoundary aBoundaryType, boundary = searchFrom.FindParagraphSameAcc(aDirection, includeOrigin, ignoreListItemMarker); break; + case nsIAccessibleText::BOUNDARY_CLUSTER: + boundary = searchFrom.FindClusterSameAcc(aDirection, includeOrigin); + break; default: MOZ_ASSERT_UNREACHABLE(); break; @@ -1369,6 +1372,62 @@ TextLeafPoint TextLeafPoint::FindParagraphSameAcc( return TextLeafPoint(); } +TextLeafPoint TextLeafPoint::FindClusterSameAcc(nsDirection aDirection, + bool aIncludeOrigin) const { + // We don't support clusters which cross nodes. We can live with that because + // editor doesn't seem to fully support this either. + if (aIncludeOrigin && mOffset == 0) { + // Since we don't cross nodes, offset 0 always begins a cluster. + return *this; + } + if (aDirection == eDirPrevious) { + if (mOffset == 0) { + // We can't go back any further. + return TextLeafPoint(); + } + if (!aIncludeOrigin && mOffset == 1) { + // Since we don't cross nodes, offset 0 always begins a cluster. We can't + // take this fast path if aIncludeOrigin is true because offset 1 might + // start a cluster, but we don't know that yet. + return TextLeafPoint(mAcc, 0); + } + } + nsAutoString text; + mAcc->AppendTextTo(text); + if (text.IsEmpty()) { + return TextLeafPoint(); + } + if (aDirection == eDirNext && + mOffset == static_cast(text.Length())) { + return TextLeafPoint(); + } + // There is GraphemeClusterBreakReverseIteratorUtf16, but it "doesn't + // handle conjoining Jamo and emoji". Therefore, we must use + // GraphemeClusterBreakIteratorUtf16 even when moving backward. + // GraphemeClusterBreakIteratorUtf16::Seek() always starts from the beginning + // and repeatedly calls Next(), regardless of the seek offset. The best we + // can do is call Next() until we find the offset we need. + intl::GraphemeClusterBreakIteratorUtf16 iter(text); + // Since we don't cross nodes, offset 0 always begins a cluster. + int32_t prevCluster = 0; + while (Maybe next = iter.Next()) { + int32_t cluster = static_cast(*next); + if (aIncludeOrigin && cluster == mOffset) { + return *this; + } + if (aDirection == eDirPrevious) { + if (cluster >= mOffset) { + return TextLeafPoint(mAcc, prevCluster); + } + prevCluster = cluster; + } else if (cluster > mOffset) { + MOZ_ASSERT(aDirection == eDirNext); + return TextLeafPoint(mAcc, cluster); + } + } + return TextLeafPoint(); +} + bool TextLeafPoint::IsInSpellingError() const { if (LocalAccessible* acc = mAcc->AsLocal()) { auto domRanges = FindDOMSpellingErrors(acc, mOffset, mOffset + 1); diff --git a/accessible/base/TextLeafRange.h b/accessible/base/TextLeafRange.h index 23fea2ecfba2..1df0693eadc3 100644 --- a/accessible/base/TextLeafRange.h +++ b/accessible/base/TextLeafRange.h @@ -228,6 +228,9 @@ class TextLeafPoint final { bool aIncludeOrigin, bool aIgnoreListItemMarker = false) const; + TextLeafPoint FindClusterSameAcc(nsDirection aDirection, + bool aIncludeOrigin) const; + bool IsInSpellingError() const; /** diff --git a/accessible/interfaces/nsIAccessibleText.idl b/accessible/interfaces/nsIAccessibleText.idl index 5bd125c30467..1a7d19753747 100644 --- a/accessible/interfaces/nsIAccessibleText.idl +++ b/accessible/interfaces/nsIAccessibleText.idl @@ -22,6 +22,8 @@ interface nsIAccessibleText : nsISupports const int32_t TEXT_OFFSET_END_OF_TEXT = -1; const int32_t TEXT_OFFSET_CARET = -2; + // A single Unicode character. For a user-perceived character, see + // BOUNDARY_CLUSTER. const AccessibleTextBoundary BOUNDARY_CHAR = 0; const AccessibleTextBoundary BOUNDARY_WORD_START = 1; const AccessibleTextBoundary BOUNDARY_WORD_END = 2; @@ -30,6 +32,10 @@ interface nsIAccessibleText : nsISupports const AccessibleTextBoundary BOUNDARY_LINE_START = 5; const AccessibleTextBoundary BOUNDARY_LINE_END = 6; const AccessibleTextBoundary BOUNDARY_PARAGRAPH = 7; + // A grapheme cluster, AKA user-perceived character. This might consist of + // multiple Unicode characters, but a user will perceive this as a single + // character and it is treated as such by the caret, selection, etc. + const AccessibleTextBoundary BOUNDARY_CLUSTER = 8; /** * The current current caret offset. diff --git a/accessible/tests/browser/text/browser_text.js b/accessible/tests/browser/text/browser_text.js index ce1d19bc5d9f..8cc22b4aa44a 100644 --- a/accessible/tests/browser/text/browser_text.js +++ b/accessible/tests/browser/text/browser_text.js @@ -333,3 +333,31 @@ addAccessibleTask( remoteIframe: true, } ); + +/** + * Test cluster offsets. + */ +addAccessibleTask( + `

À2🤦‍♂️🤦🏼‍♂️5x͇͕̦̍͂͒7È

`, + async function testCluster(browser, docAcc) { + const clusters = findAccessibleChildByID(docAcc, "clusters"); + testCharacterCount(clusters, 26); + testTextAtOffset(clusters, BOUNDARY_CLUSTER, [ + [0, 1, "À", 0, 2], + [2, 2, "2", 2, 3], + [3, 7, "🤦‍♂️", 3, 8], + [8, 14, "🤦🏼‍♂️", 8, 15], + [15, 15, "5", 15, 16], + [16, 22, "x͇͕̦̍͂͒", 16, 23], + [23, 23, "7", 23, 24], + [24, 25, "È", 24, 26], + [26, 26, "", 26, 26], + ]); + // Ensure that BOUNDARY_CHAR returns single Unicode characters. + testTextAtOffset(clusters, BOUNDARY_CHAR, [ + [0, 0, "A", 0, 1], + [1, 1, "̀", 1, 2], + ]); + }, + { chrome: true, topLevel: true } +); diff --git a/accessible/tests/mochitest/text.js b/accessible/tests/mochitest/text.js index 6fe2a00b83e6..5fad7d5ebbb5 100644 --- a/accessible/tests/mochitest/text.js +++ b/accessible/tests/mochitest/text.js @@ -9,6 +9,7 @@ const BOUNDARY_WORD_END = nsIAccessibleText.BOUNDARY_WORD_END; const BOUNDARY_LINE_START = nsIAccessibleText.BOUNDARY_LINE_START; const BOUNDARY_LINE_END = nsIAccessibleText.BOUNDARY_LINE_END; const BOUNDARY_PARAGRAPH = nsIAccessibleText.BOUNDARY_PARAGRAPH; +const BOUNDARY_CLUSTER = nsIAccessibleText.BOUNDARY_CLUSTER; const kTextEndOffset = nsIAccessibleText.TEXT_OFFSET_END_OF_TEXT; const kCaretOffset = nsIAccessibleText.TEXT_OFFSET_CARET;