Bug 339066. Update inline spellchecking to extract words from the DOM (augmented with style data), handling words that span DOM nodes. Fixes many issues. patch by brettw, roc; r=brettw,roc,mscott,sr=bryner

2006-07-04 00:32:34 +00:00 · 2006-07-04 00:32:34 +00:00 · 09cd0b42c9
--- a/extensions/spellcheck/src/Makefile.in
+++ b/extensions/spellcheck/src/Makefile.in
@ -75,6 +75,7 @@ CPPSRCS		= \
 		mozGenericWordUtils.cpp		\
 		mozSpellI18NManager.cpp		\
 		mozInlineSpellChecker.cpp	\
+		mozInlineSpellWordUtil.cpp      \
 		$(NULL)

 EXTRA_DSO_LDOPTS = \
--- a/extensions/spellcheck/src/mozInlineSpellChecker.cpp
+++ b/extensions/spellcheck/src/mozInlineSpellChecker.cpp
--- a/extensions/spellcheck/src/mozInlineSpellChecker.h
+++ b/extensions/spellcheck/src/mozInlineSpellChecker.h
@ -20,6 +20,7 @@
 *
 * Contributor(s): Neil Deakin (neil@mozdevgroup.com)
 *                 Scott MacGregor (mscott@mozilla.org)
+ *                 Brett Wilson <brettw@gmail.com>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
@ -51,6 +52,7 @@
 #include "mozISpellI18NUtil.h"

 class nsIDOMMouseEventListener;
+class mozInlineSpellWordUtil;

 class mozInlineSpellChecker : public nsIInlineSpellChecker, nsIEditActionListener, nsIDOMMouseListener, nsIDOMKeyListener,
                                     nsSupportsWeakReference
@ -160,13 +162,14 @@ public:
  // or an e-mail signature...)
  nsresult SkipSpellCheckForNode(nsIDOMNode *aNode, PRBool * aCheckSpelling);

-  nsresult AdjustSpellHighlighting(nsIDOMNode *aNode,
-                                   PRInt32 aOffset,
-                                   nsISelection *aSpellCheckSelection,
-                                   PRBool isDeletion);
+  nsresult SpellCheckAfterChange(nsIDOMNode* aCursorNode, PRInt32 aCursorOffset,
+                                 nsIDOMNode* aPreviousNode, PRInt32 aPreviousOffset,
+                                 nsISelection* aSpellCheckSelection);

  // spell check  the text contained within aRange
-  nsresult SpellCheckRange(nsIDOMRange *aRange, nsISelection *aSpellCheckSelection);
+  nsresult DoSpellCheck(mozInlineSpellWordUtil& aWordUtil,
+                        nsIDOMRange *aRange, nsIDOMRange* aNoCheckRange,
+                        nsISelection *aSpellCheckSelection);

  // helper routine to determine if a point is inside of a the passed in selection.
  nsresult IsPointInSelection(nsISelection *aSelection,
@ -175,7 +178,6 @@ public:
                              nsIDOMRange **aRange);
  
  nsresult CleanupRangesInSelection(nsISelection *aSelection);
-  nsresult RemoveCurrentWordFromSpellSelection(nsISelection *aSpellCheckSelection, nsIDOMRange * aWordRange);

  // helper routines used to control how many ranges we allow into the spell check selection
  nsresult RemoveRange(nsISelection *aSpellCheckSelection, nsIDOMRange * aRange);
@ -187,15 +189,8 @@ public:
  nsresult UnregisterEventListeners();
  nsresult HandleNavigationEvent(nsIDOMEvent * aEvent, PRBool aForceWordSpellCheck, PRInt32 aNewPositionOffset = 0);

-  // helper routine which expands a point in a text node out to the range for the containing word. 
-  nsresult GenerateRangeForSurroundingWord(nsIDOMNode * aNode, PRInt32 aOffset, nsIDOMRange ** aWordRange);
-
-  // helper routine for determining if aNode/aOffset is an end of word delimiter.
-  PRBool EndOfAWord(nsIDOMNode *aNode, PRInt32 aOffset);
-
  nsresult GetSpellCheckSelection(nsISelection ** aSpellCheckSelection);
  nsresult SaveCurrentSelectionPosition();
-  nsresult EnsureConverter();
 };

 #endif /* __mozinlinespellchecker_h__ */
--- a/extensions/spellcheck/src/mozInlineSpellWordUtil.cpp
+++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.cpp
--- a/extensions/spellcheck/src/mozInlineSpellWordUtil.h
+++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.h
@ -0,0 +1,204 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is inline spellchecker code.
+ *
+ * The Initial Developer of the Original Code is Google Inc.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2006
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Brett Wilson <brettw@gmail.com> (original author)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "mozITXTToHTMLConv.h"
+
+#include "nsCOMPtr.h"
+#include "nsIDOMDocument.h"
+#include "nsIDOMDocumentRange.h"
+#include "nsIDOMViewCSS.h"
+#include "nsIDocument.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+//#define DEBUG_SPELLCHECK
+
+class nsIDOMRange;
+class nsIDOMNode;
+
+/**
+ *    This class extracts text from the DOM and builds it into a single string.
+ *    The string includes whitespace breaks whereever non-inline elements begin
+ *    and end. This string is broken into "real words", following somewhat
+ *    complex rules; for example substrings that look like URLs or
+ *    email addresses are treated as single words, but otherwise many kinds of
+ *    punctuation are treated as word separators. GetNextWord provides a way
+ *    to iterate over these "real words".
+ *
+ *    The basic operation is:
+ *
+ *    1. Call Init with the weak pointer to the editor that you're using.
+ *    2. Call SetEnd to set where you want to stop spellchecking. We'll stop
+ *       at the word boundary after that. If SetEnd is not called, we'll stop
+ *       at the end of the document's root element.
+ *    3. Call SetPosition to initialize the current position inside the
+ *       previously given range.
+ *    4. Call GetNextWord over and over until it returns false.
+ */
+
+class mozInlineSpellWordUtil
+{
+public:
+  struct NodeOffset {
+    nsIDOMNode* mNode;
+    PRInt32     mOffset;
+    
+    NodeOffset(nsIDOMNode* aNode, PRInt32 aOffset) :
+      mNode(aNode), mOffset(aOffset) {}
+  };
+
+  mozInlineSpellWordUtil()
+    : mRootNode(nsnull),
+      mSoftBegin(nsnull, 0), mSoftEnd(nsnull, 0),
+      mNextWordIndex(-1), mSoftTextValid(PR_FALSE) {}
+
+  nsresult Init(nsWeakPtr aWeakEditor);
+
+  nsresult SetEnd(nsIDOMNode* aEndNode, PRInt32 aEndOffset);
+
+  // expands the current range to incorporate this new range
+  nsresult ExpandFor(nsIDOMNode* aBeginNode, PRInt32 aBeginOffset,
+                     nsIDOMNode* aEndNode, PRInt32 aEndOffset);
+
+  // sets the current position, this should be inside the range. If we are in
+  // the middle of a word, we'll move to its start.
+  nsresult SetPosition(nsIDOMNode* aNode, PRInt32 aOffset);
+
+  // Given a point inside or immediately following a word, this returns the
+  // DOM range that exactly encloses that word's characters. The current
+  // position will be at the end of the word. This will find the previous
+  // word if the current position is space, so if you care that the point is
+  // inside the word, you should check the range.
+  //
+  // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
+  // before you actually generate the range you are interested in and iterate
+  // the words in it.
+  nsresult GetRangeForWord(nsIDOMNode* aWordNode, PRInt32 aWordOffset,
+                           nsIDOMRange** aRange);
+
+  // Moves to the the next word in the range, and retrieves it's text and range.
+  // An empty word and a NULL range are returned when we are done checking.
+  // aSkipChecking will be set if the word is "special" and shouldn't be
+  // checked (e.g., an email address).
+  nsresult GetNextWord(nsAString& aText, nsIDOMRange** aRange,
+                       PRBool* aSkipChecking);
+
+  // Call to normalize some punctuation. This function takes an autostring
+  // so we can access characters directly.
+  static void NormalizeWord(nsSubstring& aWord);
+
+  nsIDOMDocumentRange* GetDocumentRange() const { return mDOMDocumentRange; }
+  nsIDocument* GetDocument() const { return mDocument; }
+
+private:
+
+  // cached stuff for the editor, set by Init
+  nsCOMPtr<nsIDOMDocumentRange> mDOMDocumentRange;
+  nsCOMPtr<nsIDocument>         mDocument;
+  nsCOMPtr<nsIDOMViewCSS>       mCSSView;
+
+  // range to check, see SetRange
+  nsIDOMNode* mRootNode;
+  NodeOffset  mSoftBegin;
+  NodeOffset  mSoftEnd;
+
+  // lazily created, may be NULL
+  nsCOMPtr<mozITXTToHTMLConv> mURLDetector;
+
+  // DOM text covering the soft range, with newlines added at block boundaries
+  nsString mSoftText;
+  // A list of where we extracted text from, ordered by mSoftTextOffset. A given
+  // DOM node appears at most once in this list.
+  struct DOMTextMapping {
+    NodeOffset mNodeOffset;
+    PRInt32    mSoftTextOffset;
+    PRInt32    mLength;
+    
+    DOMTextMapping(NodeOffset aNodeOffset, PRInt32 aSoftTextOffset, PRInt32 aLength)
+      : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset),
+        mLength(aLength) {}
+  };
+  nsTArray<DOMTextMapping> mSoftTextDOMMapping;
+  
+  // A list of the "real words" in mSoftText, ordered by mSoftTextOffset
+  struct RealWord {
+    PRInt32      mSoftTextOffset;
+    PRInt32      mLength;
+    PRPackedBool mCheckableWord;
+    
+    RealWord(PRInt32 aOffset, PRInt32 aLength, PRPackedBool aCheckable)
+      : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {}
+    PRInt32 EndOffset() const { return mSoftTextOffset + mLength; }
+  };
+  nsTArray<RealWord> mRealWords;
+  PRInt32            mNextWordIndex;
+
+  PRPackedBool mSoftTextValid;
+
+  void InvalidateWords() { mSoftTextValid = PR_FALSE; }
+  void EnsureWords();
+  
+  PRInt32 MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset);
+  // Map an offset into mSoftText to a DOM position. Note that two DOM positions
+  // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
+  // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
+  // aHintBefore controls which position we return ... if aHint is eEnd
+  // then the position indicates the END of a range so we return (A,4). Otherwise
+  // the position indicates the START of a range so we return (B,0).
+  enum DOMMapHint { HINT_BEGIN, HINT_END };
+  NodeOffset MapSoftTextOffsetToDOMPosition(PRInt32 aSoftTextOffset,
+                                            DOMMapHint aHint);
+  // Finds the index of the real word containing aSoftTextOffset, or -1 if none
+  // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
+  // later word (favouring the assumption that it's the BEGINning of a word),
+  // otherwise return the earlier word (assuming it's the END of a word).
+  // If aSearchForward is true, then if we don't find a word at the given
+  // position, search forward until we do find a word and return that (if found).
+  PRInt32 FindRealWordContaining(PRInt32 aSoftTextOffset, DOMMapHint aHint,
+                                 PRBool aSearchForward);
+    
+  // build mSoftText and mSoftTextDOMMapping
+  void BuildSoftText();
+  // Build mRealWords array
+  void BuildRealWords();
+
+  void SplitDOMWord(PRInt32 aStart, PRInt32 aEnd);
+
+  // Convenience functions, object must be initialized
+  nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsIDOMRange** aRange);
+  nsresult MakeRangeForWord(const RealWord& aWord, nsIDOMRange** aRange);
+};